Python Txt2JSON.process_corpus Examples

Programming Language: Python

Namespace/Package Name: txt2json

Class/Type: Txt2JSON

Method/Function: process_corpus

Examples at hotexamples.com: 4

Python Txt2JSON.process_corpus - 4 examples found. These are the top rated real world Python examples of txt2json.Txt2JSON.process_corpus extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

__init__(10)

process_corpus(4)

Frequently Used Methods

__init__ (10)

process_corpus (4)

Example #1

Show file

File: eaf2json.py Project: timarkh/tsakorpus_wc

 def process_corpus(self, cutMedia=True):
     """
     Take every eaf file from the source directory subtree, turn it
     into a parsed json and store it in the target directory.
     """
     Txt2JSON.process_corpus(self)
     if not cutMedia:
         return
     mediaDir = os.path.join(self.corpusSettings['corpus_dir'], self.srcExt)
     if 'media_dir' in self.corpusSettings:
         mediaDir = self.corpusSettings['media_dir']
     for path, dirs, files in os.walk(mediaDir):
         # Process video files first
         files = [fname for fname in files if fname.lower().endswith(('.avi', '.mts', '.mov'))] + \
                 [fname for fname in files if fname.lower().endswith('.mp4')] + \
                 [fname for fname in files if not fname.lower().endswith(('.avi', '.mts', '.mov', '.mp4'))]
         for fname in files:
             fileExt = os.path.splitext(fname.lower())[1]
             if fileExt in self.mediaExtensions:
                 privacySegments = []
                 if fname in self.privacySegments:
                     privacySegments = self.privacySegments[fname]
                 fname = os.path.abspath(os.path.join(path, fname))
                 print('Cutting media file', fname)
                 self.mc.cut_media(fname,
                                   usedFilenames=self.usedMediaFiles,
                                   privacySegments=privacySegments)

Example #2

Show file

File: eaf2json.py Project: LingConLab/Bashkir_corpus

 def process_corpus(self, cutMedia=False):
     """
     Take every eaf file from the source directory subtree, turn it
     into a parsed json and store it in the target directory.
     """
     Txt2JSON.process_corpus(self)
     if not cutMedia:
         return
     for path, dirs, files in os.walk(os.path.join('..', self.srcExt)):
         for fname in files:
             fileExt = os.path.splitext(fname.lower())[1]
             if fileExt in self.mediaExtensions:
                 fname = os.path.abspath(os.path.join(path, fname))
                 print('Cutting media file', fname)
                 self.mc.cut_media(fname)

Example #3

Show file

 def process_corpus(self):
     """
     Take every Exmaralda file from the source directory subtree, turn it
     into a parsed json and store it in the target directory.
     Split all the corpus media files into overlapping chunks of
     small duration.
     This is the main function of the class.
     """
     Txt2JSON.process_corpus(self)
     for path, dirs, files in os.walk(
             os.path.join(self.corpusSettings['corpus_dir'], self.srcExt)):
         for fname in files:
             fileExt = os.path.splitext(fname.lower())[1]
             if fileExt in self.mediaExtensions:
                 fname = os.path.abspath(os.path.join(path, fname))
                 print('Cutting media file', fname)
                 self.mc.cut_media(fname)

Example #4

Show file

File: eaf2json.py Project: gisly/evenki-corpus

    def process_corpus(self, cutMedia=True, filenames = None, isToProcessCorpus = True):
        """
        Take every eaf file from the source directory subtree, turn it
        into a parsed json and store it in the target directory.
        """
        if isToProcessCorpus:
            Txt2JSON.process_corpus(self)
        if not cutMedia:
            return
        print(os.path.join(self.corpusSettings['corpus_dir'],
                                                      self.srcExt))
        for path, dirs, files in os.walk(os.path.join(self.corpusSettings['corpus_dir'],
                                                      self.srcExt)):

            for fname in files:
                if filenames and fname not in filenames:
                    continue
                print('!!!!')
                fileExt = os.path.splitext(fname.lower())[1]
                if fileExt in self.mediaExtensions:
                    fname = os.path.abspath(os.path.join(path, fname))
                    print('Cutting media file', fname)
                    self.mc.cut_media(fname)