def main(self, files, outDir): data_sets = [] for name in ['S1', 'S2', 'S3']: data_sets.append('%s:%s' % (self.settings[name+'_DATASET'], self.settings[name+'_MAP'])) tmpDir = os.path.normpath(outDir)+'.tmp' mkdirp(tmpDir) mkdirp(outDir) unsList = '%s/obs.list:%s/%%(id)s.obs' % (tmpDir, tmpDir) unsList2 = '%s/obs.2.list:%s/%%(id)s.2.obs' % (tmpDir, tmpDir) outList = '%s/out.list:%s/%%(id)s.dcd' % (tmpDir, tmpDir) parseType = self.settings['PARSE_TYPE'] origDataSets = self.settings['ORIG_DATASETS'] pdtDir = self.settings['PDT20_TOOLS'] generator = genInputs.InputGenerator() generator.main(files, tmpDir, unsList = [unsList, outList], dataSet=data_sets[:1], parseType=parseType, origDataSets=origDataSets, txtInput=True, pdtDir=pdtDir) generator.main(files, tmpDir, unsList = [unsList2], dataSet=data_sets[1:], parseType=parseType, origDataSets=origDataSets, txtInput=True, pdtDir=pdtDir, extraExt='.2') self.setCommonParams() self.decode(tmpDir, outDir) if not self.debugMain: shutil.rmtree(tmpDir, True)
def writeDAs(self, separ, dataLm): mkdirp(dataLm) for da, da_words in separ.iteritems(): fn = self.mapTXT(dataLm, da) fw = codecs.open(fn, 'w', 'utf-8') try: for item in da_words: fw.write("%s\n" % (' '.join(item),)) finally: fw.close()
def writeDAs(self, separ, dataLm): mkdirp(dataLm) for da, da_words in separ.iteritems(): fn = self.mapTXT(dataLm, da) fw = codecs.open(fn, 'w', 'utf-8') try: for item in da_words: fw.write("%s\n" % (' '.join(item), )) finally: fw.close()
def main(self, files, outDir): data_sets = [] for name in ["S1", "S2", "S3"]: data_sets.append("%s:%s" % (self.settings[name + "_DATASET"], self.settings[name + "_MAP"])) tmpDir = os.path.normpath(outDir) + ".tmp" mkdirp(tmpDir) mkdirp(outDir) unsList = "%s/obs.list:%s/%%(id)s.obs" % (tmpDir, tmpDir) unsList2 = "%s/obs.2.list:%s/%%(id)s.2.obs" % (tmpDir, tmpDir) outList = "%s/out.list:%s/%%(id)s.dcd" % (tmpDir, tmpDir) parseType = self.settings["PARSE_TYPE"] origDataSets = self.settings["ORIG_DATASETS"] pdtDir = self.settings["PDT20_TOOLS"] generator = genInputs.InputGenerator() generator.main( files, tmpDir, unsList=[unsList, outList], dataSet=data_sets[:1], parseType=parseType, origDataSets=origDataSets, txtInput=True, pdtDir=pdtDir, ) generator.main( files, tmpDir, unsList=[unsList2], dataSet=data_sets[1:], parseType=parseType, origDataSets=origDataSets, txtInput=True, pdtDir=pdtDir, extraExt=".2", ) self.setCommonParams() self.decode(tmpDir, outDir) if not self.debugMain: shutil.rmtree(tmpDir, True)
def makeDirs(self): self.logger.info("Making build directories") mkdirp(self.workDir) mkdirp(self.outDir)
def fsmconvert(self, pteMapFn=None): sys.path.append('src') import fsm from svc.ui import gmtk max_states = int(self.settings['FSM_STATES']) cutoff_sym = float(self.settings['FSM_CUTOFF_SYM']) cutoff_trans = float(self.settings['FSM_CUTOFF_TRANS']) self.setCommonParams() FSM_DIR = self.settings['FSM_DIR'] mkdirp(FSM_DIR) conceptMapFn = self.settings['CONCEPT_MAP'] self.logger.debug("Reading concept map: %s", conceptMapFn) conceptMap = SymMap.readFromFile(conceptMapFn, format=(int, unicode)).inverse del conceptMap['_SINK_'] #conceptMap = SymMap((k, v) for (k, v) in conceptMap.iteritems() if k in '_EMPTY_ GREETING DEPARTURE'.split()) #conceptMap = SymMap((k, v) for (k, v) in conceptMap.iteritems() if k in '_EMPTY_ GREETING'.split()) dataset_fn = os.path.join(FSM_DIR, 'datasets') dataset_fw = file(dataset_fn, 'w') sMaps = [] for ds in [1, 2, 3]: ds_value = self.settings['S%d_DATASET' % ds] if ds_value != 'off': mapFn = self.settings['S%d_MAP' % ds] self.logger.debug("Reading s%d map: %s", ds, mapFn) map = SymMap.readFromFile(mapFn, format=(int, unicode)).inverse #map = SymMap((k, v) for (k, v) in map.iteritems() if k in u'dobrĂ½ den kdy jede _empty_ _unseen_'.split()) sMaps.append(map) else: self.logger.debug("Dataset s%d is turned off", ds) sMaps.append(None) dataset_fw.write(ds_value + '\n') dataset_fw.close() if pteMapFn is not None: self.logger.debug("Reading pte map: %s", pteMapFn) pteMap = SymMap.readFromFile(pteMapFn, format=(unicode, int)) else: pteMap = {} pteSymbols = pteMap.keys() mstr = os.path.join(self.settings['MSTR_DCD_DIR'], 'in.mstr') cppOptions = self.settings['CPP_OPTIONS'].split() workspace = gmtk.Workspace(cppOptions=cppOptions, readDTS=False) self.logger.info('Reading master file: %s', mstr) workspace.readMasterFile(mstr) self.logger.info('Creating FSM from arcs') self.logger.info('Total number of concepts: %d', len(conceptMap)) #self.logger.info('Total number of symbols: %d', len(s1Map)) stateGenerator = fsm.FSMGenerator(workspace, conceptMap, sMaps, cutoff_sym, cutoff_trans, max_states, pteSymbols=pteSymbols, logger=self.logger) stateGenerator.writeFSMRepeater( os.path.join(FSM_DIR, 'hvsrepeater.txt')) stateGenerator.writeFSMPadder(os.path.join(FSM_DIR, 'hvspadder.txt')) stateGenerator.writeFSM(os.path.join(FSM_DIR, 'hvsparser_pad.txt')) stateGenerator.stateMap.writeToFile(os.path.join(FSM_DIR, 'state.map')) stateGenerator.osymMap.writeToFile(os.path.join(FSM_DIR, 'osym.map')) for i, map in enumerate(stateGenerator.isymMaps): map.writeToFile(os.path.join(FSM_DIR, 'isym%d.map' % (i + 1, ))) stateGenerator.ipteMap.writeToFile(os.path.join(FSM_DIR, 'pte.map')) self.fsmcompile()
def fsmconvert(self, pteMapFn=None): sys.path.append('src') import fsm from svc.ui import gmtk max_states = int(self.settings['FSM_STATES']) cutoff_sym = float(self.settings['FSM_CUTOFF_SYM']) cutoff_trans = float(self.settings['FSM_CUTOFF_TRANS']) self.setCommonParams() FSM_DIR = self.settings['FSM_DIR'] mkdirp(FSM_DIR) conceptMapFn = self.settings['CONCEPT_MAP'] self.logger.debug("Reading concept map: %s", conceptMapFn) conceptMap = SymMap.readFromFile(conceptMapFn, format=(int, unicode)).inverse del conceptMap['_SINK_'] #conceptMap = SymMap((k, v) for (k, v) in conceptMap.iteritems() if k in '_EMPTY_ GREETING DEPARTURE'.split()) #conceptMap = SymMap((k, v) for (k, v) in conceptMap.iteritems() if k in '_EMPTY_ GREETING'.split()) dataset_fn = os.path.join(FSM_DIR, 'datasets') dataset_fw = file(dataset_fn, 'w') sMaps = [] for ds in [1, 2, 3]: ds_value = self.settings['S%d_DATASET' % ds] if ds_value != 'off': mapFn = self.settings['S%d_MAP'% ds] self.logger.debug("Reading s%d map: %s", ds, mapFn) map = SymMap.readFromFile(mapFn, format=(int, unicode)).inverse #map = SymMap((k, v) for (k, v) in map.iteritems() if k in u'dobrĂ½ den kdy jede _empty_ _unseen_'.split()) sMaps.append(map) else: self.logger.debug("Dataset s%d is turned off", ds) sMaps.append(None) dataset_fw.write(ds_value + '\n') dataset_fw.close() if pteMapFn is not None: self.logger.debug("Reading pte map: %s", pteMapFn) pteMap = SymMap.readFromFile(pteMapFn, format=(unicode, int)) else: pteMap = {} pteSymbols = pteMap.keys() mstr = os.path.join(self.settings['MSTR_DCD_DIR'], 'in.mstr') cppOptions = self.settings['CPP_OPTIONS'].split() workspace = gmtk.Workspace(cppOptions=cppOptions, readDTS=False) self.logger.info('Reading master file: %s', mstr) workspace.readMasterFile(mstr) self.logger.info('Creating FSM from arcs') self.logger.info('Total number of concepts: %d', len(conceptMap)) #self.logger.info('Total number of symbols: %d', len(s1Map)) stateGenerator = fsm.FSMGenerator(workspace, conceptMap, sMaps, cutoff_sym, cutoff_trans, max_states, pteSymbols=pteSymbols, logger=self.logger) stateGenerator.writeFSMRepeater(os.path.join(FSM_DIR, 'hvsrepeater.txt')) stateGenerator.writeFSMPadder(os.path.join(FSM_DIR, 'hvspadder.txt')) stateGenerator.writeFSM(os.path.join(FSM_DIR, 'hvsparser_pad.txt')) stateGenerator.stateMap.writeToFile(os.path.join(FSM_DIR, 'state.map')) stateGenerator.osymMap.writeToFile(os.path.join(FSM_DIR, 'osym.map')) for i, map in enumerate(stateGenerator.isymMaps): map.writeToFile(os.path.join(FSM_DIR, 'isym%d.map' % (i+1, ))) stateGenerator.ipteMap.writeToFile(os.path.join(FSM_DIR, 'pte.map')) self.fsmcompile()
ret.append(base) if path == chop_dir: break if path == '/': break rstr = ret.pop() while ret: rstr = os.path.join(rstr, ret.pop()) return rstr to_dir = sys.argv[1] if len(sys.argv) == 3: chop_dir = sys.argv[2] else: chop_dir = '/' for fn in sys.stdin: try: fn = fn.rstrip() if not fn: continue if os.path.isdir(fn): continue new_fn = os.path.join(to_dir, cut_dir(fn, chop_dir)) mkdirp(os.path.split(new_fn)[0]) shutil.copy(fn, new_fn) except: print sys.exc_info()[1]