def __init__(self, settingsDir='conf'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.mc = MediaCutter(settings=self.corpusSettings) self.srcExt = 'exb' # extension of the source files to be converted self.tlis = {} # time labels (id -> {'n': number, 'time': time value}) self.pID = 0 # id of last aligned segment self.glosses = set()
def __init__(self, settingsDir='conf'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.rxPuncSpaceBefore = re.compile( self.corpusSettings['punc_space_before']) self.rxPuncSpaceAfter = re.compile( self.corpusSettings['punc_space_after']) self.srcExt = 'yaml' self.pID = 0 # id of last aligned segment
def __init__(self, settingsDir='conf'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.srcExt = 'xml' self.pID = 0 # id of last aligned segment self.glosses = [] self.grammRules = [] self.posRules = {} self.load_rules() self.POSTags = set() # All POS tags encountered in the XML self.rxStemGlosses = re.compile('^$')
def __init__(self, settingsDir='conf'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.speakerMeta = self.load_speaker_meta() self.mc = MediaCutter(settings=self.corpusSettings) self.srcExt = 'eaf' self.tlis = {} # time labels self.pID = 0 # id of last aligned segment self.glosses = set() self.participants = {} # main tier ID -> participant ID self.segmentTree = {} # aID -> (contents, parent aID, tli1, tli2) self.segmentChildren = {} # (aID, child tier type) -> [child aID]
def __init__(self, settingsDir='conf'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.mc = MediaCutter(settings=self.corpusSettings) self.srcExt = 'xml' # extension of the source files to be converted self.participants = {} # participant ID -> dictionary of properties self.tlis = {} # time labels (id -> {'n': number, 'time': time value}) self.wordsByID = {} # word ID -> word object self.morph2wordID = {} # morph ID -> (word ID, position in the word) self.pID = 0 # id of last aligned segment self.seg2pID = {} # ids of <seg> tags -> parallel IDs of corresponding sentences self.wordIDseq = [] # sequence of word/punctuation/incident IDs # (needed to understand ranges such as "w13 to inc2") self.glosses = set() self.posRules = {} self.load_pos_rules(os.path.join(self.corpusSettings['corpus_dir'], 'conf/posRules.txt'))
def __init__(self, settingsDir='conf_conversion'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.srcExt = 'xml' self.pID = 0 # id of last aligned segment self.glosses = [] self.grammRules = [] self.posRules = {} self.load_rules() self.POSTags = set() # All POS tags encountered in the XML self.rxStemGlosses = re.compile('^$') self.mainGlossLang = 'en' self.badAnalysisLangs = [] if 'main_gloss_language' in self.corpusSettings: self.mainGlossLang = self.corpusSettings['main_gloss_language'] if 'bad_analysis_languages' in self.corpusSettings: self.badAnalysisLangs = self.corpusSettings['bad_analysis_languages']
def __init__(self, settingsDir='conf_conversion'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.speakerMeta = self.load_speaker_meta() self.mc = MediaCutter(settings=self.corpusSettings) self.srcExt = 'eaf' self.tlis = {} # time labels self.pID = 0 # id of last aligned segment self.glosses = set() self.participants = {} # main tier ID -> participant ID self.segmentTree = {} # aID -> (contents, parent aID, tli1, tli2) self.segmentChildren = {} # (aID, child tier type) -> [child aID] self.spanAnnoTiers = {} # span annotation tier type -> {tier ID -> [(tli1, tli2, contents)} self.alignedSpanAnnoTiers = {} # aID of a segment -> {span annotation tier ID -> contents} self.additionalWordFields = [] # names of additional word-level fields associated with some analysis tiers self.privacySegments = {} # segments (start_ms, end_ms) that should be beeped out, one list per source file self.rxIgnoreTokens = None self.set_ignore_tokens() self.usedMediaFiles = set() # filenames of media fragments referenced in the JSONs
def __init__(self, settingsDir='conf'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.srcExt = 'csv' self.pID = 0 # id of last aligned segment
def __init__(self, settingsDir='conf'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.srcExt = 'json' self.glosses = set()
def __init__(self, settingsDir='conf_conversion'): Txt2JSON.__init__(self, settingsDir=settingsDir) self.srcExt = 'xml' self.pID = 0 # id of last aligned segment