Python dynamic_lengthの例

プログラミング言語: Python

名前空間/パッケージ名: src.utils.nlp

メソッド/関数: dynamic_length

hotexamples.comのコード掲載数: 2

Python dynamic_length - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsrc.utils.nlp.dynamic_lengthの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

    def count_data_and_build_dict(self, data_list, gene_dicts=True):
        def add_ept_and_unk(a_list):
            a_list.insert(0, '@@@empty')
            a_list.insert(1, '@@@unk')
            return a_list

        _logger.add()
        _logger.add('counting and build dictionaries')

        token_collection = []
        char_collection = []

        sent_len_collection = []
        token_len_collection = []

        for sample in data_list:
            for tree_node in sample:
                token_collection += tree_node['token_seq']
                sent_len_collection.append(len(tree_node['token_seq']))
                for char_seq in tree_node['char_seq']:
                    char_collection += char_seq
                    token_len_collection.append(len(char_seq))

        max_sent_len = dynamic_length(sent_len_collection, 1, security=False)[0]
        max_token_len = dynamic_length(token_len_collection, 0.99, security=False)[0]

        if gene_dicts:
            # token & char
            tokenSet = dynamic_keep(token_collection, 1)
            charSet = dynamic_keep(char_collection, 1)
            if cfg.use_glove_unk_token:
                gloveData = load_glove(cfg.word_embedding_length)
                gloveTokenSet = list(gloveData.keys())
                if cfg.lower_word:
                    tokenSet = list(set([token.lower() for token in tokenSet]))  ##!!!
                    gloveTokenSet = list(set([token.lower() for token in gloveTokenSet]))  ##!!!

                # delete token from gloveTokenSet which appears in tokenSet
                for token in tokenSet:
                    try:
                        gloveTokenSet.remove(token)
                    except ValueError:
                        pass
            else:
                if cfg.lower_word:
                    tokenSet = list(set([token.lower() for token in tokenSet]))
                gloveTokenSet = []
            tokenSet = add_ept_and_unk(tokenSet)
            charSet = add_ept_and_unk(charSet)
            dicts = {'token': tokenSet, 'char': charSet, 'glove': gloveTokenSet}
        else:
            dicts = {}

        _logger.done()
        return dicts, {'sent': max_sent_len, 'token': max_token_len}

コード例 #2

ファイルを表示

ファイル: dataset.py プロジェクト: zwjyyc/BiBloSA

    def count_data_and_build_dict(dataset, sent_len_rate, gene_dicts=True):
        def add_ept_and_unk(a_list):
            a_list.insert(0, '@@@empty')
            a_list.insert(1, '@@@unk')
            return a_list

        _logger.add()
        _logger.add('counting and build dictionaries')

        token_collection = []
        sent_num_collection = []
        sent_len_collection = []
        question_len_collection = []

        for topic in dataset:
            for paragraph in topic['paragraphs']:
                sent_num_collection.append(len(paragraph['context_token']))
                for sent_token in paragraph['context_token']:
                    sent_len_collection.append(len(sent_token))
                    token_collection += sent_token
                for qa in paragraph['qas']:
                    question_len_collection.append(len(qa['question_token']))
                    token_collection += qa['question_token']

        _logger.done()

        max_sent_num, _ = dynamic_length(sent_num_collection, 1.)
        max_sent_len, _ = dynamic_length(sent_len_collection, sent_len_rate)
        max_question_len, _ = dynamic_length(question_len_collection, 0.995)

        if gene_dicts:
            tokenSet = dynamic_keep(token_collection, 0.995)
            if cfg.use_glove_unk_token:
                gloveData = load_glove(cfg.word_embedding_length)
                gloveTokenSet = list(gloveData.keys())
                if cfg.lower_word:
                    tokenSet = list(set([token.lower()
                                         for token in tokenSet]))  ##!!!
                    gloveTokenSet = list(
                        set([token.lower() for token in gloveTokenSet]))  ##!!!

                # delete token from gloveTokenSet which appears in tokenSet
                for token in tokenSet:
                    try:
                        gloveTokenSet.remove(token)
                    except ValueError:
                        pass
            else:
                if cfg.lower_word:
                    tokenSet = list(set([token.lower() for token in tokenSet]))
                gloveTokenSet = []
            tokenSet = add_ept_and_unk(tokenSet)
            dicts = {'token': tokenSet, 'glove': gloveTokenSet}
        else:
            dicts = {}
        _logger.done()
        return dicts, {
            'sent_num': max_sent_num,
            'sent_len': max_sent_len,
            'question': max_question_len
        }