Python Alphabet 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: finest.utils.alphabet

클래스/타입: Alphabet

hotexamples.com에서의 예제들: 6

Python Alphabet - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 finest.utils.alphabet.Alphabet에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Alphabet(3)

add(2)

has_instance(1)

load(1)

예제 #1

파일 보기

    def __init__(self, config):
        """
        :return:
        """
        if config.word_vector == 'word2vec':
            logger.info("Loading word2vec from disk ...")
            self.model = Word2Vec.load_word2vec_format(config.word_vector_path, binary=True)
        print("Loading done...")

        self.full_alphabet = Alphabet("full_lookup")

예제 #2

파일 보기

def read_conll(path, label_alphabet=None):
    word_sentences = []
    pos_sentences = []
    words = []
    poses = []

    word_alphabet = Alphabet('word', (padding_symbol, ))

    if label_alphabet is None:
        label_alphabet = Alphabet('label', (padding_symbol, ))

    with open(path) as f:
        for l in f:
            if l.strip() == "":
                word_sentences.append(words[:])
                pos_sentences.append(poses[:])
                words = []
                poses = []
            else:
                parts = l.split()
                word = parts[1]
                pos = parts[4]
                words.append(word)
                poses.append(pos)
                word_alphabet.add(word)
                label_alphabet.add(pos)

    # Add the last sentence in.
    if len(words) > 0:
        word_sentences.append(words[:])
        pos_sentences.append(poses[:])

    return word_sentences, pos_sentences, word_alphabet, label_alphabet

예제 #3

파일 보기

파일: data_processor.py 프로젝트: hunterhector/FinestTune

def read_conll(path):
    word_sentences = []
    pos_sentences = []
    words = []
    poses = []

    word_alphabet = Alphabet('word', (padding_symbol,))
    pos_alphabet = Alphabet('pos', (padding_symbol,))

    with open(path) as f:
        for l in f:
            if l.strip() == "":
                word_sentences.append(words[:])
                pos_sentences.append(poses[:])
                words = []
                poses = []
            else:
                parts = l.split()
                word = parts[1]
                pos = parts[4]
                words.append(word)
                poses.append(pos)
                word_alphabet.add(word)
                pos_alphabet.add(pos)

    return word_sentences, pos_sentences, word_alphabet, pos_alphabet

예제 #4

파일 보기

파일: experiment.py 프로젝트: curiousTauseef/FinestTune

def read_models(model_base, data_name, model):
    logger.info("Loading models from disk.")

    models = {}

    models_to_load = ['auto', 'vanilla'] if model == 'all' else [model]

    for t in models_to_load:
        model = BaseLearner()
        model_dir = os.path.join(model_base, data_name, t)
        model.load(model_dir)

        pos_alphabet = Alphabet('pos')
        word_alphabet = Alphabet('word')

        pos_alphabet.load(model_dir)
        word_alphabet.load(model_dir)

        models[t] = (model, pos_alphabet, word_alphabet)

    logger.info("Loading done.")

    return models

예제 #5

파일 보기

파일: experiment.py 프로젝트: hunterhector/FinestTune

def read_models(model_base, data_name, model):
    logger.info("Loading models from disk.")

    models = {}

    models_to_load = ['auto', 'vanilla'] if model == 'all' else [model]

    for t in models_to_load:
        model = BaseLearner()
        model_dir = get_model_directory(model_base, data_name, t)
        model.load(model_dir)

        pos_alphabet = Alphabet('pos')
        word_alphabet = Alphabet('word')

        pos_alphabet.load(model_dir)
        word_alphabet.load(model_dir)

        models[t] = (model, pos_alphabet, word_alphabet)

    logger.info("Loading done.")

    return models

예제 #6

파일 보기

class Lookup:
    def __init__(self, config):
        """
        :return:
        """
        if config.word_vector == 'word2vec':
            logger.info("Loading word2vec from disk ...")
            self.model = Word2Vec.load_word2vec_format(config.word_vector_path, binary=True)
        print("Loading done...")

        self.full_alphabet = Alphabet("full_lookup")

    def initail_lookup(self, alphabet):
        """
        Initialize the lookup table of the word vectors. This will create a full lookup table that contains all the
        vocabulary, and a table that contains only the given alphabet.
        :param alphabet: The alphabet that stores the words.
        :return: A numpy array of shape [vocabulary size, dimension], each row is a word embedding.
        """
        embeddings = []
        if Alphabet.default_index == 0:
            embeddings.append(uniform_embedding([1, self.model.vector_size]))
        else:
            raise ValueError("Default index is not the first one, you must change the implementation here.")

        # Add words from the given alphabet to the embedding list, and to the full alphabet.
        for w, index in alphabet.iteritems():
            if not self.full_alphabet.has_instance(w):
                embedding = self.model[w] if w in self.model else uniform_embedding([1, self.model.vector_size])
                embeddings.append(embedding)
                self.full_alphabet.add(w)

        # Store embeddings that appear in training data.
        self.table = np.vstack(embeddings)

        for w in self.model.vocab.keys():
            if not alphabet.has_instance(w):
                embedding = self.model[w]
                self.full_alphabet.add(w)
                embeddings.append(embedding)

        # Store embeddings of the full vocabulary.
        self.full_table = np.vstack(embeddings)

        logger.info("The training only embedding table contains %d embeddings, each with a dimension of size %d." % (
            self.table.shape[0], self.table.shape[1]))

        logger.info("The full embedding table contains %d embeddings, each with a dimension of size %d." % (
            self.full_table.shape[0], self.full_table.shape[1]))

    def load_additional_embeddings(self, original_alphabet, new_alphabet):
        """
        Create an additional lookup table that contains additional words that's not in the orginal ones.
        :param original_alphabet:  The original table.
        :param new_alphabet:  The additional table.
        :return:
        """
        embeddings = []
        for w, index in new_alphabet.iteritems():
            if not original_alphabet.has_instance(w):
                embedding = self.model[w] if w in self.model else uniform_embedding([1, self.model.vector_size])
                embeddings.append(embedding)

        if len(embeddings) > 0:
            additional_table = np.vstack(embeddings)
            return additional_table
        else:
            return None