Example #1
0
    def main(self, files, outDir):
        data_sets = []
        for name in ['S1', 'S2', 'S3']:
            data_sets.append('%s:%s' % (self.settings[name+'_DATASET'], self.settings[name+'_MAP']))

        tmpDir = os.path.normpath(outDir)+'.tmp'
        mkdirp(tmpDir)
        mkdirp(outDir)

        unsList = '%s/obs.list:%s/%%(id)s.obs' % (tmpDir, tmpDir)
        unsList2 = '%s/obs.2.list:%s/%%(id)s.2.obs' % (tmpDir, tmpDir)
        outList = '%s/out.list:%s/%%(id)s.dcd' % (tmpDir, tmpDir)

        parseType = self.settings['PARSE_TYPE']
        origDataSets = self.settings['ORIG_DATASETS']

        pdtDir = self.settings['PDT20_TOOLS']

        generator = genInputs.InputGenerator()
        generator.main(files, tmpDir, unsList = [unsList, outList],
                dataSet=data_sets[:1], parseType=parseType,
                origDataSets=origDataSets, txtInput=True, pdtDir=pdtDir)
        generator.main(files, tmpDir, unsList = [unsList2],
                dataSet=data_sets[1:], parseType=parseType,
                origDataSets=origDataSets, txtInput=True, pdtDir=pdtDir,
                extraExt='.2')
        
        self.setCommonParams()
        self.decode(tmpDir, outDir)
        if not self.debugMain:
            shutil.rmtree(tmpDir, True)
    def writeDAs(self, separ, dataLm):
        mkdirp(dataLm)

        for da, da_words in separ.iteritems():
            fn = self.mapTXT(dataLm, da)
            fw = codecs.open(fn, 'w', 'utf-8')
            try:
                for item in da_words:
                    fw.write("%s\n" % (' '.join(item),))
            finally:
                fw.close()
    def writeDAs(self, separ, dataLm):
        mkdirp(dataLm)

        for da, da_words in separ.iteritems():
            fn = self.mapTXT(dataLm, da)
            fw = codecs.open(fn, 'w', 'utf-8')
            try:
                for item in da_words:
                    fw.write("%s\n" % (' '.join(item), ))
            finally:
                fw.close()
    def main(self, files, outDir):
        data_sets = []
        for name in ["S1", "S2", "S3"]:
            data_sets.append("%s:%s" % (self.settings[name + "_DATASET"], self.settings[name + "_MAP"]))

        tmpDir = os.path.normpath(outDir) + ".tmp"
        mkdirp(tmpDir)
        mkdirp(outDir)

        unsList = "%s/obs.list:%s/%%(id)s.obs" % (tmpDir, tmpDir)
        unsList2 = "%s/obs.2.list:%s/%%(id)s.2.obs" % (tmpDir, tmpDir)
        outList = "%s/out.list:%s/%%(id)s.dcd" % (tmpDir, tmpDir)

        parseType = self.settings["PARSE_TYPE"]
        origDataSets = self.settings["ORIG_DATASETS"]

        pdtDir = self.settings["PDT20_TOOLS"]

        generator = genInputs.InputGenerator()
        generator.main(
            files,
            tmpDir,
            unsList=[unsList, outList],
            dataSet=data_sets[:1],
            parseType=parseType,
            origDataSets=origDataSets,
            txtInput=True,
            pdtDir=pdtDir,
        )
        generator.main(
            files,
            tmpDir,
            unsList=[unsList2],
            dataSet=data_sets[1:],
            parseType=parseType,
            origDataSets=origDataSets,
            txtInput=True,
            pdtDir=pdtDir,
            extraExt=".2",
        )

        self.setCommonParams()
        self.decode(tmpDir, outDir)
        if not self.debugMain:
            shutil.rmtree(tmpDir, True)
 def makeDirs(self):
     self.logger.info("Making build directories")
     mkdirp(self.workDir)
     mkdirp(self.outDir)
    def fsmconvert(self, pteMapFn=None):
        sys.path.append('src')
        import fsm
        from svc.ui import gmtk

        max_states = int(self.settings['FSM_STATES'])
        cutoff_sym = float(self.settings['FSM_CUTOFF_SYM'])
        cutoff_trans = float(self.settings['FSM_CUTOFF_TRANS'])

        self.setCommonParams()
        FSM_DIR = self.settings['FSM_DIR']
        mkdirp(FSM_DIR)

        conceptMapFn = self.settings['CONCEPT_MAP']
        self.logger.debug("Reading concept map: %s", conceptMapFn)
        conceptMap = SymMap.readFromFile(conceptMapFn,
                                         format=(int, unicode)).inverse
        del conceptMap['_SINK_']
        #conceptMap = SymMap((k, v) for (k, v) in conceptMap.iteritems() if k in '_EMPTY_ GREETING DEPARTURE'.split())
        #conceptMap = SymMap((k, v) for (k, v) in conceptMap.iteritems() if k in '_EMPTY_ GREETING'.split())

        dataset_fn = os.path.join(FSM_DIR, 'datasets')
        dataset_fw = file(dataset_fn, 'w')
        sMaps = []
        for ds in [1, 2, 3]:
            ds_value = self.settings['S%d_DATASET' % ds]
            if ds_value != 'off':
                mapFn = self.settings['S%d_MAP' % ds]
                self.logger.debug("Reading s%d map: %s", ds, mapFn)
                map = SymMap.readFromFile(mapFn, format=(int, unicode)).inverse
                #map = SymMap((k, v) for (k, v) in map.iteritems() if k in u'dobrĂ½ den kdy jede _empty_ _unseen_'.split())
                sMaps.append(map)
            else:
                self.logger.debug("Dataset s%d is turned off", ds)
                sMaps.append(None)
            dataset_fw.write(ds_value + '\n')
        dataset_fw.close()

        if pteMapFn is not None:
            self.logger.debug("Reading pte map: %s", pteMapFn)
            pteMap = SymMap.readFromFile(pteMapFn, format=(unicode, int))
        else:
            pteMap = {}
        pteSymbols = pteMap.keys()

        mstr = os.path.join(self.settings['MSTR_DCD_DIR'], 'in.mstr')
        cppOptions = self.settings['CPP_OPTIONS'].split()
        workspace = gmtk.Workspace(cppOptions=cppOptions, readDTS=False)
        self.logger.info('Reading master file: %s', mstr)
        workspace.readMasterFile(mstr)

        self.logger.info('Creating FSM from arcs')

        self.logger.info('Total number of concepts: %d', len(conceptMap))
        #self.logger.info('Total number of symbols: %d', len(s1Map))

        stateGenerator = fsm.FSMGenerator(workspace,
                                          conceptMap,
                                          sMaps,
                                          cutoff_sym,
                                          cutoff_trans,
                                          max_states,
                                          pteSymbols=pteSymbols,
                                          logger=self.logger)
        stateGenerator.writeFSMRepeater(
            os.path.join(FSM_DIR, 'hvsrepeater.txt'))
        stateGenerator.writeFSMPadder(os.path.join(FSM_DIR, 'hvspadder.txt'))
        stateGenerator.writeFSM(os.path.join(FSM_DIR, 'hvsparser_pad.txt'))

        stateGenerator.stateMap.writeToFile(os.path.join(FSM_DIR, 'state.map'))
        stateGenerator.osymMap.writeToFile(os.path.join(FSM_DIR, 'osym.map'))
        for i, map in enumerate(stateGenerator.isymMaps):
            map.writeToFile(os.path.join(FSM_DIR, 'isym%d.map' % (i + 1, )))
        stateGenerator.ipteMap.writeToFile(os.path.join(FSM_DIR, 'pte.map'))

        self.fsmcompile()
 def makeDirs(self):
     self.logger.info("Making build directories")
     mkdirp(self.workDir)
     mkdirp(self.outDir)
    def fsmconvert(self, pteMapFn=None):
        sys.path.append('src')
        import fsm
        from svc.ui import gmtk

        max_states = int(self.settings['FSM_STATES'])
        cutoff_sym = float(self.settings['FSM_CUTOFF_SYM'])
        cutoff_trans = float(self.settings['FSM_CUTOFF_TRANS'])

        self.setCommonParams()
        FSM_DIR = self.settings['FSM_DIR']
        mkdirp(FSM_DIR)

        conceptMapFn = self.settings['CONCEPT_MAP']
        self.logger.debug("Reading concept map: %s", conceptMapFn)
        conceptMap = SymMap.readFromFile(conceptMapFn, format=(int, unicode)).inverse
        del conceptMap['_SINK_']
        #conceptMap = SymMap((k, v) for (k, v) in conceptMap.iteritems() if k in '_EMPTY_ GREETING DEPARTURE'.split())
        #conceptMap = SymMap((k, v) for (k, v) in conceptMap.iteritems() if k in '_EMPTY_ GREETING'.split())

        dataset_fn = os.path.join(FSM_DIR, 'datasets')
        dataset_fw = file(dataset_fn, 'w')
        sMaps = []
        for ds in [1, 2, 3]:
            ds_value = self.settings['S%d_DATASET' % ds]
            if ds_value != 'off':
                mapFn = self.settings['S%d_MAP'% ds]
                self.logger.debug("Reading s%d map: %s", ds, mapFn)
                map = SymMap.readFromFile(mapFn, format=(int, unicode)).inverse
                #map = SymMap((k, v) for (k, v) in map.iteritems() if k in u'dobrĂ½ den kdy jede _empty_ _unseen_'.split())
                sMaps.append(map)
            else:
                self.logger.debug("Dataset s%d is turned off", ds)
                sMaps.append(None)
            dataset_fw.write(ds_value + '\n')
        dataset_fw.close()

        if pteMapFn is not None:
            self.logger.debug("Reading pte map: %s", pteMapFn)
            pteMap = SymMap.readFromFile(pteMapFn, format=(unicode, int))
        else:
            pteMap = {}
        pteSymbols = pteMap.keys()
        
        mstr = os.path.join(self.settings['MSTR_DCD_DIR'], 'in.mstr')
        cppOptions = self.settings['CPP_OPTIONS'].split()
        workspace = gmtk.Workspace(cppOptions=cppOptions, readDTS=False)
        self.logger.info('Reading master file: %s', mstr)
        workspace.readMasterFile(mstr)

        self.logger.info('Creating FSM from arcs')

        self.logger.info('Total number of concepts: %d', len(conceptMap))
        #self.logger.info('Total number of symbols: %d', len(s1Map))

        stateGenerator = fsm.FSMGenerator(workspace, conceptMap, sMaps,
                                    cutoff_sym, cutoff_trans, max_states,
                                    pteSymbols=pteSymbols,
                                    logger=self.logger)
        stateGenerator.writeFSMRepeater(os.path.join(FSM_DIR, 'hvsrepeater.txt'))
        stateGenerator.writeFSMPadder(os.path.join(FSM_DIR, 'hvspadder.txt'))
        stateGenerator.writeFSM(os.path.join(FSM_DIR, 'hvsparser_pad.txt'))

        stateGenerator.stateMap.writeToFile(os.path.join(FSM_DIR, 'state.map'))
        stateGenerator.osymMap.writeToFile(os.path.join(FSM_DIR, 'osym.map'))
        for i, map in enumerate(stateGenerator.isymMaps):
            map.writeToFile(os.path.join(FSM_DIR, 'isym%d.map' % (i+1, )))
        stateGenerator.ipteMap.writeToFile(os.path.join(FSM_DIR, 'pte.map'))

        self.fsmcompile()
Example #9
0
        ret.append(base)
        if path == chop_dir:
            break
        if path == '/':
            break
    rstr = ret.pop()
    while ret:
        rstr = os.path.join(rstr, ret.pop())
    return rstr

to_dir = sys.argv[1]
if len(sys.argv) == 3:
    chop_dir = sys.argv[2]
else:
    chop_dir = '/'

for fn in sys.stdin:
    try:
        fn = fn.rstrip()
        if not fn:
            continue
        if os.path.isdir(fn):
            continue
        new_fn = os.path.join(to_dir, cut_dir(fn, chop_dir))
        mkdirp(os.path.split(new_fn)[0])
        shutil.copy(fn, new_fn)
    except:
        print sys.exc_info()[1]