コード例 #1
0
ファイル: pipeline.py プロジェクト: tomaye/Thesis
    def load_corpus(self, name, files, min=15, max= 100, merge=False):
        '''
        :param name: key for dictionary entry in self.corpora
        :param files: list of files
        :param min, max: min and max length of sentences
        :param merge: one or two text elements. one if true
        :return: None
        '''

        CL = CorpusLoader(files[0], min, max)

        if len(files) > 1:
            iterfiles = iter(files)
            next(iterfiles)
            for file in iterfiles:
                CL.add_Corpus(file, min, max)

        if merge:
            CL.mergeData()

        CL.containing.append(name)
        CL.tokenize()

        corpus = self.tax.expandTax(CL)

        self.corpora[name] = corpus
コード例 #2
0
    def load_corpus(self, name, files, min=15, max=100, merge=False):
        '''
        :param name: key for dictionary entry in self.corpora
        :param files: list of files
        :param min, max: min and max length of sentences
        :param merge: one or two text elements. one if true
        :return: None
        '''

        CL = CorpusLoader(files[0], min, max)

        if len(files) > 1:
            iterfiles = iter(files)
            next(iterfiles)
            for file in iterfiles:
                CL.add_Corpus(file, min, max)

        if merge:
            CL.mergeData()

        CL.containing.append(name)
        CL.tokenize()

        corpus = self.tax.expandTax(CL)

        self.corpora[name] = corpus
コード例 #3
0
ファイル: runClassifier.py プロジェクト: tomaye/Thesis
def load_corpus(name, files, merge = True):

    CL = CorpusLoader(files[0], min, max)
    CL.add_Corpus(files[1],min, max)

    if merge:
        CL.mergeData()

    corpora[name] = CL

    print(name+ " loaded...")
コード例 #4
0
def load_corpus(name, files, merge=True):

    CL = CorpusLoader(files[0], min, max)
    CL.add_Corpus(files[1], min, max)

    if merge:
        CL.mergeData()

    corpora[name] = CL

    print(name + " loaded...")