def get_match_result(patterns, pattern2attributes, tokens):
    '''
    use the final patterns to match the sent,if ';' exist in the sent, we cut the sent first, 
    otherwise we directly depend the pattern and value position to get the attributes and values
    patterns: the final patterns that used in sentence
    '''
    attributes2value = {}
    seg_token = Token(';', ':')
    if contain_token(seg_token, tokens):
        chips = cut_token_list(tokens, [seg_token])
        for chip in chips:
            value_pos = get_value_pos(patterns, chip)
            logger.info(str(patterns) + 'value_pos' + str(value_pos))
            for i in range(len(patterns) - 1, -1, -1):
                sent = ""
                end = value_pos[i][1]
                slice_chip = chip[value_pos[i][0]:end]
                for token in slice_chip:
                    if token.word in ['.', ';', ',']:
                        sent = sent.strip() + token.word + " "
                    else:
                        sent += token.word + " "
            attributes2value[pattern2attributes[patterns[i]]] = sent.strip()
            if len(sent.strip()) > 0 and sent.strip()[-1] in [';', ',', '.']:
                attributes2value[pattern2attributes[
                    patterns[i]]] = sent.strip()[:-1]
    else:
        value_pos = get_value_pos(patterns, tokens)
        logger.info(str(patterns) + ' value_pos: ' + str(value_pos))
        #         print 'value_pos',value_pos
        for i in range(len(patterns) - 1, -1, -1):
            #             print i,patterns[i]
            sent = ""
            end = value_pos[i][1]
            #             print value_pos[i][0],end
            slice_sent_pos = tokens[value_pos[i][0]:end]
            for token in slice_sent_pos:
                if token.word in ['.', ';', ',']:
                    sent = sent.strip() + token.word + " "
                else:
                    sent += token.word + " "

            attributes2value[pattern2attributes[patterns[i]]] = sent.strip()
            if len(sent.strip()) > 0 and sent.strip()[-1] in [';', ',', '.']:
                attributes2value[pattern2attributes[
                    patterns[i]]] = sent.strip()[:-1]
    return attributes2value
def get_fix(size, attribute_value_tokens, definition_tokens):
    chunks = cut_token_list(attribute_value_tokens, [Token(';', ':')])
    prefixs2intersect = []
    for chunk in chunks:
        prefixs2intersect.extend(get_fix_chunk(size, chunk, definition_tokens))
    return prefixs2intersect
def get_tokens(pos_words):
    tokens = []
    for word2tag in pos_words:
        token = Token(word2tag[0], word2tag[1])
        tokens.append(token)
    return tokens
def process_definition(definition, pattern2attrubute):
    attributes2value = {}
    logger.info('definition: %s' % definition)
    if definition.strip().startswith('See') or definition.strip().startswith(
            'see'):
        process_vacant_definition(definition)
    start = datetime.datetime.now()
    text = nltk.word_tokenize(definition)
    def_pos = nltk.pos_tag(text)
    def_tokens = pos_word2tokens(def_pos)
    logger.info(def_pos)
    end = datetime.datetime.now()
    global tag_time_all
    tag_time_all += (end - start).microseconds
    logger.info('tagging time:%d ' % ((end - start).microseconds))
    #     logger.info(def_pos)
    #     seg_point=[('.','.'),(';',':')]
    seg_point_token = [Token('.', '.'), Token(';', ':')]
    sents_tokens = cut_token_list(def_tokens, seg_point_token)
    start = datetime.datetime.now()
    end = datetime.datetime.now()
    time_find_candidate_pattern = (end - start).microseconds
    time_choice_final_pattern = (end - start).microseconds
    time_get_match_result = (end - start).microseconds
    for sent_tokens in sents_tokens:
        #         sent_pos=[]
        #         for token in sent_tokens:
        #             sent_pos.append(token.show())
        #         logger.info("sent_pos: "+str(sent_pos))
        start = datetime.datetime.now()
        candidate_patterns = find_candidate_pattern(pattern2attrubute.keys(),
                                                    sent_tokens)
        end = datetime.datetime.now()
        time_find_candidate_pattern += (end - start).microseconds
        logger.info('find candidate pattern time: ' +
                    str((end - start).microseconds))
        logger.info("candidate_patterns: " + str(candidate_patterns))

        if len(candidate_patterns) == 0:
            continue
        start = datetime.datetime.now()
        choiced_patterns = choice_final_pattern(candidate_patterns,
                                                sent_tokens)
        end = datetime.datetime.now()
        time_choice_final_pattern += (end - start).microseconds
        logger.info('choice final pattern time: ' +
                    str((end - start).microseconds))
        logger.info("choiced_patterns: " + str(choiced_patterns))

        start = datetime.datetime.now()
        attributes2value_part = get_match_result(choiced_patterns,
                                                 pattern2attrubute,
                                                 sent_tokens)
        for attribute, value in attributes2value_part.iteritems():
            if attribute in attributes2value.keys():
                part1 = attributes2value[attribute]
                attributes2value[attribute] = part1 + '; ' + value
            else:
                attributes2value[attribute] = value
        end = datetime.datetime.now()
        time_get_match_result += (end - start).microseconds
        logger.info('get match result time: ' +
                    str((end - start).microseconds))
        logger.info("attributes2value: " + str(attributes2value))
    global find_candidate_time
    find_candidate_time += time_find_candidate_pattern
    logger.info('time_find_candidate_pattern: ' +
                str(time_find_candidate_pattern))
    logger.info('time_choice_final_pattern: ' + str(time_choice_final_pattern))
    logger.info('time_get_match_result: ' + str(time_get_match_result))
    logger.info("whole attributes2value: " + str(attributes2value))
    return attributes2value
def pos_word2tokens(pos_words):
    tokens = []
    for pos_word in pos_words:
        tokens.append(Token(pos_word[0], pos_word[1]))
    return tokens