Python Vocabulary.deserializeの例

プログラミング言語: Python

名前空間/パッケージ名: vocabulary

クラス/型: Vocabulary

メソッド/関数: deserialize

hotexamples.comのコード掲載数: 7

Python Vocabulary.deserialize - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのvocabulary.Vocabulary.deserializeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Vocabulary(30)

add_word(15)

clean_text(8)

build_vocab(8)

add_words(8)

deserialize(7)

compile(4)

add(4)

antonym(4)

auto_punctuate(3)

add_token(3)

encode(3)

add_from_file(2)

decode_output(2)

getUniGrams(2)

from_documents(2)

build_corpus(2)

getVocabularyByDocument(2)

getBiGrams(2)

get_id_from_token(2)

add_a_word(2)

add_text(2)

add_many(2)

getFullDict(2)

gen_DAG(1)

from_text_files(1)

from_text(1)

from_serializable(1)

from_sentences(1)

get(1)

add_constant(1)

getPTStopWords(1)

getQuestions(1)

getVocabularySize(1)

get_all_source_words(1)

get_all_translations(1)

get_pos(1)

get_term_text(1)

make_dictionary(1)

seg_content(1)

from_nlp_data(1)

encode_sent(1)

from_idx2word_dict(1)

convert_sentence(1)

add_new_word(1)

add_sentence(1)

add_chunk(1)

add_word_lst(1)

append(1)

build(1)

コード例 #1

ファイルを表示

ファイル: classification_experiments.py プロジェクト: bellecarrell/naacl2018-before-name-calling-habernal-et-al

def cross_validation_ah():
    import random
    random.seed(1234567)

    import tensorflow

    sess_config = tensorflow.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    from tensorflow.python.keras.backend import set_session

    set_session(tensorflow.Session(config=sess_config))

    vocabulary = Vocabulary.deserialize('en-top100k.vocabulary.pkl.gz')
    embeddings = WordEmbeddings.deserialize('en-top100k.embeddings.pkl.gz')

    reader = JSONPerLineDocumentReader(
        'data/experiments/ah-classification1/exported-3621-sampled-positive-negative-ah-no-context.json',
        True)
    # e = ClassificationExperiment(reader, RandomTokenizedDocumentClassifier(), ClassificationEvaluator())
    # e = ClassificationExperiment(reader, MajorityClassTokenizedDocumentClassifier(), ClassificationEvaluator())
    # e = ClassificationExperiment(reader, SimpleLSTMTokenizedDocumentClassifier(vocabulary, embeddings), ClassificationEvaluator())
    e = ClassificationExperiment(
        reader, StackedLSTMTokenizedDocumentClassifier(vocabulary, embeddings),
        ClassificationEvaluator())
    # e = ClassificationExperiment(reader, CNNTokenizedDocumentClassifier(vocabulary, embeddings), ClassificationEvaluator())
    e.run()

コード例 #2

ファイルを表示

ファイル: classification_experiments.py プロジェクト: utkarsh512/Fallacy-Detection

def cross_validation_ah(model_type):
    # classification without context
    import random
    random.seed(1234567)

    import tensorflow as tf
    if tf.test.is_gpu_available():
        strategy = tf.distribute.MirroredStrategy()
        print('Using GPU')
    else:
        raise ValueError('CPU not recommended.')

    with strategy.scope():
        vocabulary = Vocabulary.deserialize('en-top100k.vocabulary.pkl.gz')
        embeddings = WordEmbeddings.deserialize('en-top100k.embeddings.pkl.gz')
        reader = JSONPerLineDocumentReader(
            'data/experiments/ah-classification1/exported-3621-sampled-positive-negative-ah-no-context.json',
            True)
        e = None
        if model_type == 'cnn':
            e = ClassificationExperiment(
                reader, CNNTokenizedDocumentClassifier(vocabulary, embeddings),
                ClassificationEvaluator())
        else:
            e = ClassificationExperiment(
                reader,
                StackedLSTMTokenizedDocumentClassifier(vocabulary, embeddings),
                ClassificationEvaluator())
        e.run()

コード例 #3

ファイルを表示

ファイル: classification_experiments.py プロジェクト: utkarsh512/Fallacy-Detection

def train_test_model_with_context(train_dir, indir, outdir):
    '''Custom training and testing SSAE model
    :param train_dir: Path to JSON file containing training examples
    :param indir: Path to LOG file containing examples as Comment() object (which has already been classified by Bert)
    :param outdir: Path to LOG file to be created by adding prediction of this model as well'''

    import random
    random.seed(1234567)

    import tensorflow as tf
    if tf.test.is_gpu_available():
        strategy = tf.distribute.MirroredStrategy()
        print('Using GPU')
    else:
        raise ValueError('CPU not recommended.')

    with strategy.scope():
        vocabulary = Vocabulary.deserialize('en-top100k.vocabulary.pkl.gz')
        embeddings = WordEmbeddings.deserialize('en-top100k.embeddings.pkl.gz')
        reader = JSONPerLineDocumentReader(train_dir, True)
        e = ClassificationExperiment(
            reader,
            StructuredSelfAttentiveSentenceEmbedding(vocabulary, embeddings),
            ClassificationEvaluator())
        test_comments = TokenizedDocumentReader(indir)
        result = e.label_external(test_comments)

    for k in result.keys():
        print(f'{k}: {result[k]}')

    instances = dict()

    e = Comment(-1, 'lol', 'ah')
    f = open(indir, 'rb')

    try:
        while True:
            e = pickle.load(f)
            print(e)
            instances[str(e.id)] = e
    except EOFError:
        f.close()

    f = open(outdir, 'wb')

    for key in result.keys():
        model_label, model_score = result[key]
        model_label = model_label.lower()
        score = model_score[1]
        if model_label == 'none':
            score = model_score[0]
        instances[key].add_model(model_type, model_label, score, None)
        e = instances[key]
        print(e)
        print(e.labels)
        print(e.scores)
        print('=' * 20)
        pickle.dump(instances[key], f)

    f.close()

コード例 #4

ファイルを表示

ファイル: SemanticSimilarityHelper.py プロジェクト: utkarsh512/Fallacy-Detection

    def __init__(self):
        self.vocabulary = Vocabulary.deserialize(
            'en-top100k.vocabulary.pkl.gz')
        self.embeddings = WordEmbeddings.deserialize(
            'en-top100k.embeddings.pkl.gz')

        assert isinstance(self.vocabulary, Vocabulary)
        assert isinstance(self.embeddings, WordEmbeddings)

        # for caching computed average word vectors (it's expensive)
        # dictionary = (str, np.ndarray)
        # key = text, value = average word vector
        self._average_word_vector_cache = dict()

コード例 #5

ファイルを表示

ファイル: classification_experiments.py プロジェクト: bellecarrell/naacl2018-before-name-calling-habernal-et-al

def cross_validation_thread_ah_delta_context3():
    import random
    random.seed(1234567)

    import tensorflow

    sess_config = tensorflow.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    from tensorflow.python.keras.backend import set_session
    set_session(tensorflow.Session(config=sess_config))

    vocabulary = Vocabulary.deserialize('en-top100k.vocabulary.pkl.gz')
    embeddings = WordEmbeddings.deserialize('en-top100k.embeddings.pkl.gz')

    reader = AHVersusDeltaThreadReader(
        'data/sampled-threads-ah-delta-context3', True)
    e = ClassificationExperiment(
        reader,
        StructuredSelfAttentiveSentenceEmbedding(
            vocabulary, embeddings, '/tmp/visualization-context3'),
        ClassificationEvaluator())

    e.run()

コード例 #6

ファイルを表示

ファイル: classification_experiments.py プロジェクト: utkarsh512/Fallacy-Detection

def cross_validation_thread_ah_delta_context3():
    # classification with context
    import random
    random.seed(1234567)

    import tensorflow as tf
    if tf.test.is_gpu_available():
        strategy = tf.distribute.MirroredStrategy()
        print('Using GPU')
    else:
        raise ValueError('CPU not recommended.')

    with strategy.scope():
        vocabulary = Vocabulary.deserialize('en-top100k.vocabulary.pkl.gz')
        embeddings = WordEmbeddings.deserialize('en-top100k.embeddings.pkl.gz')
        reader = AHVersusDeltaThreadReader(
            'data/sampled-threads-ah-delta-context3', True)
        e = ClassificationExperiment(
            reader,
            StructuredSelfAttentiveSentenceEmbedding(
                vocabulary, embeddings, '/tmp/visualization-context3'),
            ClassificationEvaluator())
        e.run()

コード例 #7

ファイルを表示

        tokens = doc['words']
        weights = doc['weights']
        doc_id = doc['id']

        string_buffer = []

        for i in range(len(tokens)):
            print(i, tokens[i], weights[i])
            if '___start__' in tokens[i]:
                string_buffer.append(' <br />')
            string_buffer.append('<span data-weight="' +
                                 str(100 * float(weights[i])) + '">' +
                                 tokens[i] + '</span>')
            if '___start__' in tokens[i]:
                string_buffer.append(' <br />')
        # print(string_buffer)

        print("-----------")
        return '<div class="answer"><h1>ID: ' + str(
            doc_id) + "</h1>" + "".join(string_buffer) + "</div>"


if __name__ == "__main__":
    token_counter = 0
    vocabulary = Vocabulary.deserialize('en-top100k.vocabulary.pkl.gz')
    Visualization().print_fold_latex("visualization-context3/fold1.json",
                                     "/tmp/temp1.tex", vocabulary)
    Visualization().print_fold_latex("visualization-context3/fold2.json",
                                     "/tmp/temp2.tex", vocabulary)
    print(token_counter)