Python SequenceCorpus.cell_numの例

プログラミング言語: Python

名前空間/パッケージ名: neural_machine.tasks.language.common.corpus.sequence_corpus

クラス/型: SequenceCorpus

メソッド/関数: cell_num

hotexamples.comのコード掲載数: 4

Python SequenceCorpus.cell_num - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのneural_machine.tasks.language.common.corpus.sequence_corpus.SequenceCorpus.cell_numの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

SequenceCorpus(2)

cell_num(2)

build(1)

clone(1)

make(1)

predict(1)

update(1)

コード例 #1

ファイルを表示

ファイル: language_model.py プロジェクト: rudaoshi/neural_machine

from neural_machine.tasks.language.common.data_reader.bucket_iter import *

import sys

import logging

if __name__ == '__main__':

    head = '%(asctime)-15s %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=head)

    segmenter = SpaceSegmenter()
    corpus = SequenceCorpus()

    corpus.build(open(sys.argv[1], 'r'), segmenter)
    cell_num = corpus.cell_num()

    problem = LanguageModelProblem(corpus)

    batch_size = 32

    data_train = BucketIter(problem, batch_size)

    val_corpus = corpus.make(open(sys.argv[2], 'r'), segmenter)
    val_problem = LanguageModelProblem(val_corpus)
    data_val = BucketIter(val_problem, batch_size)

    arch_param = LanguageModelArchParam(num_hidden=200,
                                        num_embed=200,
                                        num_lstm_layer=2,
                                        cell_num=corpus.cell_num())

コード例 #2

ファイルを表示

ファイル: sequence_pair_corpus.py プロジェクト: rudaoshi/neuralmachines

class SequencePairCorpus(object):
    def __init__(self,
                 source_with_start=False, source_with_end = False, source_with_unk = False,
                 target_with_start=False, target_with_end=False, target_with_unk=False,
                 same_length = False
                 ):
        
        self.source_with_start = source_with_start
        self.source_with_end = source_with_end
        self.source_with_unk = source_with_unk

        self.target_with_start = target_with_start
        self.target_with_end = target_with_end
        self.target_with_unk = target_with_unk
        

        self.source_corpus = SequenceCorpus(source_with_start, source_with_end, source_with_unk)
        self.target_corpus = SequenceCorpus(target_with_start, target_with_end, target_with_unk)
        self.same_length = same_length

        self.corpus = []

    def build(self, data_file, source_segmenter, target_segmenter):

        for line in data_file:
            line = line.strip()
            if not line:
                continue

            try:
                src_seq, tgt_seq = line.split('\t')
            except:
                logging.error("no sequence pair found in sentence : {0} ".format(json.dumps(line)))
                continue

            if self.same_length and len(src_seq) != len(tgt_seq):
                logging.error("src and tgt seq not in same length {0} {1} {2}".format(len(src_seq), len(tgt_seq), json.dumps(line)))
                continue

            src=self.source_corpus.update(src_seq, source_segmenter)
            target=self.target_corpus.update(tgt_seq, target_segmenter)

            self.corpus.append((src, target))
    
    def make(self, data_file, source_segmenter, target_segmenter):

        corpus = SequencePairCorpus(
                 self.source_with_start, self.source_with_end , self.source_with_unk ,
                 self.target_with_start, self.target_with_end, self.target_with_unk,
                 self.same_length
                 )

        corpus.source_corpus = self.source_corpus.clone()
        corpus.target_corpus = self.target_corpus.clone()

        for line in data_file:
            line = line.strip()
            if not line:
                continue

            try:
                src_seq, tgt_seq = line.split('\t')
            except:
                logging.error("no sequence pair found in sentence : {0} ".format(json.dumps(line)))
                continue

            if self.same_length and len(src_seq) != len(tgt_seq):
                logging.error("src and tgt seq not in same length {0} {1} {2}".format(len(src_seq), len(tgt_seq), json.dumps(line)))
                continue

            src = self.source_corpus.predict(src_seq, source_segmenter)
            target = self.target_corpus.predict(tgt_seq, target_segmenter)

            corpus.corpus.append((src, target))

        return corpus



    def source_cell_num(self):
        return self.source_corpus.cell_num()

    def target_cell_num(self):
        return self.target_corpus.cell_num()

    def corpus_size(self):
        return len(self.corpus)

コード例 #3

ファイルを表示

ファイル: language_model.py プロジェクト: rudaoshi/neuralmachines

import sys

import logging

if __name__ == '__main__':

    head = '%(asctime)-15s %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=head)


    segmenter = SpaceSegmenter()
    corpus = SequenceCorpus()

    corpus.build(open(sys.argv[1], 'r'), segmenter)
    cell_num = corpus.cell_num()

    problem = LanguageModelProblem(corpus)

    batch_size = 32

    data_train = BucketIter(problem, batch_size)

    val_corpus = corpus.make(open(sys.argv[2], 'r'), segmenter)
    val_problem = LanguageModelProblem(val_corpus)
    data_val = BucketIter(val_problem, batch_size)


    arch_param = LanguageModelArchParam(
        num_hidden= 200,
        num_embed= 200,

コード例 #4

ファイルを表示

class SequencePairCorpus(object):
    def __init__(self,
                 source_with_start=False,
                 source_with_end=False,
                 source_with_unk=False,
                 target_with_start=False,
                 target_with_end=False,
                 target_with_unk=False,
                 same_length=False):

        self.source_with_start = source_with_start
        self.source_with_end = source_with_end
        self.source_with_unk = source_with_unk

        self.target_with_start = target_with_start
        self.target_with_end = target_with_end
        self.target_with_unk = target_with_unk

        self.source_corpus = SequenceCorpus(source_with_start, source_with_end,
                                            source_with_unk)
        self.target_corpus = SequenceCorpus(target_with_start, target_with_end,
                                            target_with_unk)
        self.same_length = same_length

        self.corpus = []

    def build(self, data_file, source_segmenter, target_segmenter):

        for line in data_file:
            line = line.strip()
            if not line:
                continue

            try:
                src_seq, tgt_seq = line.split('\t')
            except:
                logging.error(
                    "no sequence pair found in sentence : {0} ".format(
                        json.dumps(line)))
                continue

            if self.same_length and len(src_seq) != len(tgt_seq):
                logging.error(
                    "src and tgt seq not in same length {0} {1} {2}".format(
                        len(src_seq), len(tgt_seq), json.dumps(line)))
                continue

            src = self.source_corpus.update(src_seq, source_segmenter)
            target = self.target_corpus.update(tgt_seq, target_segmenter)

            self.corpus.append((src, target))

    def make(self, data_file, source_segmenter, target_segmenter):

        corpus = SequencePairCorpus(self.source_with_start,
                                    self.source_with_end, self.source_with_unk,
                                    self.target_with_start,
                                    self.target_with_end, self.target_with_unk,
                                    self.same_length)

        corpus.source_corpus = self.source_corpus.clone()
        corpus.target_corpus = self.target_corpus.clone()

        for line in data_file:
            line = line.strip()
            if not line:
                continue

            try:
                src_seq, tgt_seq = line.split('\t')
            except:
                logging.error(
                    "no sequence pair found in sentence : {0} ".format(
                        json.dumps(line)))
                continue

            if self.same_length and len(src_seq) != len(tgt_seq):
                logging.error(
                    "src and tgt seq not in same length {0} {1} {2}".format(
                        len(src_seq), len(tgt_seq), json.dumps(line)))
                continue

            src = self.source_corpus.predict(src_seq, source_segmenter)
            target = self.target_corpus.predict(tgt_seq, target_segmenter)

            corpus.corpus.append((src, target))

        return corpus

    def source_cell_num(self):
        return self.source_corpus.cell_num()

    def target_cell_num(self):
        return self.target_corpus.cell_num()

    def corpus_size(self):
        return len(self.corpus)