def genCorrContentFrame(filename, Corpus=Corpus, TreePath='CorrFullTrees/'):
    if '-' in filename: instructionID = filename.split('-')[1]
    else: instructionID = filename
    print '\n',instructionID
    if not Corpus:
        Directors = ['EDA','EMWC','KLS','KXP','TJS','WLH']
        Maps = ['Jelly','L','Grid']
        Corpus = DirectionCorpusReader(constructItemRegexp(Directors,Maps))
    Trees=[tree['TREE'] for tree in Corpus.read(TreePath+'/FullTree-'+instructionID)]
    Frames = trees2frames(Trees)
    saveParse(Trees,instructionID,directory='Directions/'+TreePath)
    saveFrame(Frames,instructionID)
    for frame in Frames: print `frame`
Example #2
0
def genCorrContentFrame(filename, Corpus=Corpus, TreePath="CorrFullTrees/"):
    if "-" in filename:
        instructionID = filename.split("-")[1]
    else:
        instructionID = filename
    print "\n", instructionID
    if not Corpus:
        Directors = ["EDA", "EMWC", "KLS", "KXP", "TJS", "WLH"]
        Maps = ["Jelly", "L", "Grid"]
        Corpus = DirectionCorpusReader(constructItemRegexp(Directors, Maps))
    Trees = [tree["TREE"] for tree in Corpus.read(TreePath + "/FullTree-" + instructionID)]
    Frames = trees2frames(Trees)
    saveParse(Trees, instructionID, directory="Directions/" + TreePath)
    saveFrame(Frames, instructionID)
    for frame in Frames:
        print ` frame `
Example #3
0
def trainPCFG(Directors,
              Maps,
              PcfgFile=PcfgFileName,
              SenseTaggerFile=SenseTaggerFileName,
              cv=0.1,
              Starts=[],
              mapversions='[01]',
              Lexicon=''):
    corpus_regexp = constructItemRegexp(Directors,
                                        Maps,
                                        starts=Starts,
                                        mapversions=mapversions)
    if Lexicon: corpus_regexp = constructSetOrRegexp([corpus_regexp, Lexicon])
    Directions = DirectionCorpusReader(corpus_regexp)
    Pcfg, SenseTagger, TestSet = cvTrainPCFG(Directions,
                                             saveParses=0,
                                             StartSymbol='S',
                                             Group='CorrFullDirTrees',
                                             cv=cv,
                                             parseTest=doParses)
    if __debug__: print Pcfg
    if PcfgFile:
        cPickle.dump(Pcfg, open(PcfgFile, 'w'))
        cPickle.dump(SenseTagger, open(SenseTaggerFile, 'w'))
    return Pcfg, SenseTagger, TestSet
Example #4
0
def parse3From12():
    PcfgFileName = 'Corpus1+Corpus2-12-Corrected.pcfg'
    Directors= Directors1+Directors2

    try: nltk.corpus.set_basedir(system_corpora)
    except: system_corpora=nltk.corpus.get_basedir()
    
    logger.initLogger('ParseDirections',LogDir='MarcoLogs')
    import enchant
    from Sense import Lexicon
    spellchecker = enchant.DictWithPWL('en_US', Lexicon)
    DirParser = getDirParser(Directors, Maps, usePOSTagger, POSTaggerFileName,
                             PcfgFileName, SenseTaggerFileName, collectStats,
                             spellchecker=spellchecker)
    Directions = DirectionCorpusReader(constructItemRegexp(Directors3,Maps,mapversions='[01]'))
    parseTestSet(DirParser, Directions, list(Directions.items('CleanDirs')), 1)
def genUncorrContentFrames(Directors):
    import re
    Corpus = DirectionCorpusReader(constructItemRegexp(Directors, mapversions='[01]'))
    for filename in lstail('Directions/FullTrees', re.compile('^FullTree-.*.txt$')):
        try:
            genCorrContentFrame(filename, TreePath='FullTrees/')
        except ValueError:
            pass
Example #6
0
def parseInstruction(instructID):
    return DirectionCorpusReader(constructItemRegexp(
        Directors, Maps)).parseInstruction(getDirParser(Directors,
                                                        Maps,
                                                        collectStats=False),
                                           instructID,
                                           saveParses=True,
                                           frames=True)
            logger.error("%s.",CaughtErrorTxt)
            if str(e).startswith("Error parsing field structure"):
                CaughtError = 'EOFError'
            else:
                CaughtError = 'ValueError'
    return frames,CaughtError,CaughtErrorTxt

def getSSS(instructID):
    if not instructID.endswith('txt'): instructID += '.txt'
    return readCorrFrame([],instructID)[0]

if __name__ == '__main__':
    logger.initLogger('Sense',LogDir='MarcoLogs')
    Directors = ['EDA','EMWC','KLS','KXP','TJS','WLH']
    Maps = ['Jelly','L','Grid']
    Corpus = DirectionCorpusReader(constructItemRegexp(Directors,Maps))
else: Corpus = None

def genCorrContentFrame(filename, Corpus=Corpus, TreePath='CorrFullTrees/'):
    if '-' in filename: instructionID = filename.split('-')[1]
    else: instructionID = filename
    print '\n',instructionID
    if not Corpus:
        Directors = ['EDA','EMWC','KLS','KXP','TJS','WLH']
        Maps = ['Jelly','L','Grid']
        Corpus = DirectionCorpusReader(constructItemRegexp(Directors,Maps))
    Trees=[tree['TREE'] for tree in Corpus.read(TreePath+'/FullTree-'+instructionID)]
    Frames = trees2frames(Trees)
    saveParse(Trees,instructionID,directory='Directions/'+TreePath)
    saveFrame(Frames,instructionID)
    for frame in Frames: print `frame`
Example #8
0
    import enchant
    from Sense import Lexicon
    spellchecker = enchant.DictWithPWL('en_US', Lexicon)
    DirParser = getDirParser(Directors,
                             Maps,
                             usePOSTagger,
                             POSTaggerFileName,
                             PcfgFileName,
                             SenseTaggerFileName,
                             collectStats,
                             spellchecker=spellchecker)

    if doParses == 'Profile':
        import profile
        profile.run('testParses(DirParser)', 'parse.prof')
    elif doParses == 'TestSet':
        Directions = DirectionCorpusReader(constructItemRegexp(
            Directors, Maps))
        parseTestSet(DirParser, Directions, TestSet, 1)
    elif doParses == 'CommandLine':
        if len(sys.argv) > 1 and sys.argv[1]:
            Files = sys.argv[1]
        else:
            Files = constructItemRegexp(Directors, Maps, mapversions='[01]')
        print 'Parsing', Files
        Directions = DirectionCorpusReader(Files)
        TestSet = list(Directions.items('CleanDirs'))
        #random.shuffle(TestSet);
        #timeSort(TestSet)
        parseTestSet(DirParser, Directions, TestSet, 1)
    try: nltk.corpus.set_basedir(system_corpora)
    except: system_corpora=nltk.corpus.get_basedir()
    
    logger.initLogger('ParseDirections',LogDir='MarcoLogs')
    import enchant
    from Sense import Lexicon
    spellchecker = enchant.DictWithPWL('en_US', Lexicon)

    DirParser = getDirParser(Directors, Maps, usePOSTagger, POSTaggerFileName,
                             PcfgFileName, SenseTaggerFileName, collectStats,
                             spellchecker=spellchecker)
    
    if doParses == 'Profile':
        import profile
        profile.run('testParses(DirParser)','parse.prof')
    elif doParses == 'TestSet':
        Directions = DirectionCorpusReader(constructItemRegexp(Directors,Maps))
        parseTestSet(DirParser,Directions,TestSet,1)
    elif doParses == 'CommandLine':
        if len(sys.argv)>1 and sys.argv[1]:
            Files = sys.argv[1]
        else:
            Files = constructItemRegexp(Directors,Maps,mapversions='[01]')
        print 'Parsing', Files
        Directions = DirectionCorpusReader(Files)
        TestSet=list(Directions.items('CleanDirs'))
        #random.shuffle(TestSet);
        #timeSort(TestSet)
        parseTestSet(DirParser,Directions,TestSet,1)