コード例 #1
0
def move_and_parse(indir='xml-form'):
    """
    Use corpkit/CoreNLP to parse the corpus
    """
    import shutil
    import os
    from corpkit import Corpus, new_project

    # make a new project and move into it
    new_project('rsc-proj')
    shutil.copytree('xml-form', 'rsc-proj/data')
    os.chdir('rsc-proj')
    corpus = Corpus('rsc-form')
    parsed = corpus.parse(metadata=True,
                          speaker_segmentation=False,
                          multiprocess=15)
コード例 #2
0
import corpkit
from corpkit import Corpus
unparsed = Corpus(
    'C:\\Users\\jbjb\\Documents\\DATA\\weird corpus\\corpkit\\explit\\data')
unparsed.parse()