Python Txt2JSON.init Examples

Programming Language: Python

Namespace/Package Name: txt2json

Class/Type: Txt2JSON

Method/Function: __init__

Examples at hotexamples.com: 10

Python Txt2JSON.__init__ - 10 examples found. These are the top rated real world Python examples of txt2json.Txt2JSON.__init__ extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

__init__(10)

process_corpus(4)

Frequently Used Methods

__init__ (10)

process_corpus (4)

Example #1

Show file

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'exb'  # extension of the source files to be converted
     self.tlis = {}  # time labels (id -> {'n': number, 'time': time value})
     self.pID = 0  # id of last aligned segment
     self.glosses = set()

Example #2

Show file

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.rxPuncSpaceBefore = re.compile(
         self.corpusSettings['punc_space_before'])
     self.rxPuncSpaceAfter = re.compile(
         self.corpusSettings['punc_space_after'])
     self.srcExt = 'yaml'
     self.pID = 0  # id of last aligned segment

Example #3

Show file

File: xml_flex2json.py Project: mansi-team/mansi_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'xml'
     self.pID = 0        # id of last aligned segment
     self.glosses = []
     self.grammRules = []
     self.posRules = {}
     self.load_rules()
     self.POSTags = set()    # All POS tags encountered in the XML
     self.rxStemGlosses = re.compile('^$')

Example #4

Show file

File: eaf2json.py Project: LingConLab/Bashkir_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.speakerMeta = self.load_speaker_meta()
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'eaf'
     self.tlis = {}  # time labels
     self.pID = 0  # id of last aligned segment
     self.glosses = set()
     self.participants = {}  # main tier ID -> participant ID
     self.segmentTree = {}  # aID -> (contents, parent aID, tli1, tli2)
     self.segmentChildren = {}  # (aID, child tier type) -> [child aID]

Example #5

Show file

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'xml'  # extension of the source files to be converted
     self.participants = {}   # participant ID -> dictionary of properties
     self.tlis = {}       # time labels (id -> {'n': number, 'time': time value})
     self.wordsByID = {}  # word ID -> word object
     self.morph2wordID = {}   # morph ID -> (word ID, position in the word)
     self.pID = 0         # id of last aligned segment
     self.seg2pID = {}    # ids of <seg> tags -> parallel IDs of corresponding sentences
     self.wordIDseq = []  # sequence of word/punctuation/incident IDs
                          # (needed to understand ranges such as "w13 to inc2")
     self.glosses = set()
     self.posRules = {}
     self.load_pos_rules(os.path.join(self.corpusSettings['corpus_dir'], 'conf/posRules.txt'))

Example #6

Show file

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'xml'
     self.pID = 0        # id of last aligned segment
     self.glosses = []
     self.grammRules = []
     self.posRules = {}
     self.load_rules()
     self.POSTags = set()    # All POS tags encountered in the XML
     self.rxStemGlosses = re.compile('^$')
     self.mainGlossLang = 'en'
     self.badAnalysisLangs = []
     if 'main_gloss_language' in self.corpusSettings:
         self.mainGlossLang = self.corpusSettings['main_gloss_language']
     if 'bad_analysis_languages' in self.corpusSettings:
         self.badAnalysisLangs = self.corpusSettings['bad_analysis_languages']

Example #7

Show file

File: eaf2json.py Project: timarkh/tsakorpus_wc

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.speakerMeta = self.load_speaker_meta()
     self.mc = MediaCutter(settings=self.corpusSettings)
     self.srcExt = 'eaf'
     self.tlis = {}  # time labels
     self.pID = 0  # id of last aligned segment
     self.glosses = set()
     self.participants = {}  # main tier ID -> participant ID
     self.segmentTree = {}  # aID -> (contents, parent aID, tli1, tli2)
     self.segmentChildren = {}  # (aID, child tier type) -> [child aID]
     self.spanAnnoTiers = {}  # span annotation tier type -> {tier ID -> [(tli1, tli2, contents)}
     self.alignedSpanAnnoTiers = {}  # aID of a segment -> {span annotation tier ID -> contents}
     self.additionalWordFields = []  # names of additional word-level fields associated with some analysis tiers
     self.privacySegments = {}  # segments (start_ms, end_ms) that should be beeped out, one list per source file
     self.rxIgnoreTokens = None
     self.set_ignore_tokens()
     self.usedMediaFiles = set()  # filenames of media fragments referenced in the JSONs

Example #8

Show file

File: img_csv2json.py Project: mansi-team/mansi_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'csv'
     self.pID = 0  # id of last aligned segment

Example #9

Show file

File: social_networks2json.py Project: mansi-team/mansi_corpus

 def __init__(self, settingsDir='conf'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'json'
     self.glosses = set()

Example #10

Show file

 def __init__(self, settingsDir='conf_conversion'):
     Txt2JSON.__init__(self, settingsDir=settingsDir)
     self.srcExt = 'xml'
     self.pID = 0        # id of last aligned segment

Python Txt2JSON.__init__ Examples

Python Txt2JSON.init Examples