Python Vocabulary.add_manyの例

プログラミング言語: Python

名前空間/パッケージ名: vocabulary

クラス/型: Vocabulary

メソッド/関数: add_many

hotexamples.comのコード掲載数: 2

Python Vocabulary.add_many - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのvocabulary.Vocabulary.add_manyの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Vocabulary(30)

add_word(15)

clean_text(8)

build_vocab(8)

add_words(8)

deserialize(7)

compile(4)

add(4)

antonym(4)

auto_punctuate(3)

add_token(3)

encode(3)

add_from_file(2)

decode_output(2)

getUniGrams(2)

from_documents(2)

build_corpus(2)

getVocabularyByDocument(2)

getBiGrams(2)

get_id_from_token(2)

add_a_word(2)

add_text(2)

add_many(2)

getFullDict(2)

gen_DAG(1)

from_text_files(1)

from_text(1)

from_serializable(1)

from_sentences(1)

get(1)

add_constant(1)

getPTStopWords(1)

getQuestions(1)

getVocabularySize(1)

get_all_source_words(1)

get_all_translations(1)

get_pos(1)

get_term_text(1)

make_dictionary(1)

seg_content(1)

from_nlp_data(1)

encode_sent(1)

from_idx2word_dict(1)

convert_sentence(1)

add_new_word(1)

add_sentence(1)

add_chunk(1)

add_word_lst(1)

append(1)

build(1)

コード例 #1

ファイルを表示

ファイル: __init__.py プロジェクト: bityangke/ikelos

def to_vocab(data, frequency_cutoff=None, size_cutoff=None):
    if not utils.xor(frequency_cutoff, size_cutoff):
        raise Exception("one or the other cutoffs please")

    counter = Counter(word for sent in data for word in sent)

    if frequency_cutoff is not None:
        print("Using a frequency of {} to reduce vocabulary size.".format(
            frequency_cutoff))
        words = [
            word for word, count in counter.most_common()
            if count > frequency_cutoff
        ]
        print("Vocabulary size reduced. {} -> {}".format(
            len(counter), len(words)))

    elif size_cutoff is not None:
        print("Using a cutoff of {} to reduce vocabulary size.".format(
            size_cutoff))
        words = [word for word, count in counter.most_common(size_cutoff)]
        print("Vocabulary size reduced. {} -> {}".format(
            len(counter), len(words)))

    else:
        raise Exception("should never happen...")

    vocab = Vocabulary(use_mask=True)
    vocab.add_many(['<START>', "<END>"])
    vocab.add_many(words)
    return vocab

コード例 #2

ファイルを表示

def make_hash_embeddings(igor, vocab):
    assert os.path.exists(igor.target_glove), "You need to specify a real file"
    fileiter = open(igor.target_glove).readlines()

    hash_vocab = Vocabulary()
    hash_vocab.use_mask = True
    hash_vocab.add(hash_vocab.mask_symbol)
    hash_vocab.add(hash_vocab.unk_symbol)
    word2hash = {}
    for word, v_id in vocab.items():
        ids = hash_vocab.add_many(hash_word(word))
        word2hash[v_id] = ids

    embeddings = np.zeros((len(hash_vocab), igor.embedding_size))
    remaining_vocab = set(vocab.keys())
    remaining_hashes = set(hash_vocab.values())
    for line in tqdm(fileiter):
        line = line.replace("\n","").split(" ")
        word, nums = line[0], [float(x.strip()) for x in line[1:]]
        word_hash = hash_word(word)
        if word in remaining_vocab:
            hash_ids = word2hash[vocab[word]]
            remaining_vocab.remove(word)
            remaining_hashes.difference_update(hash_ids)
            embeddings[hash_ids] += np.array(nums) / len(hash_ids)
    print("{} words were not seen.  {} hashes were not seen".format(len(remaining_vocab),
                                                                    len(remaining_hashes)))
    for hash_id in remaining_hashes:
        embeddings[hash_id] = np.asarray(glorot_uniform((igor.embedding_size,)).eval())

    glove_name = igor.target_glove[igor.target_glove.find("glove"):].replace("/","")

    hash_vocab.save('hash_embedding_{}.vocab'.format(glove_name))
    with open(path.join(igor.save_dir, "hash_embedding_{}.npy".format(glove_name)), "wb") as fp:
        np.save(fp, embeddings)
    with open(path.join(igor.save_dir, "word2hash.json".format(glove_name)), "w") as fp:
        json.dump(word2hash, fp)