Example #1
0
    def _generatePickledLexicalUnitsIndex(self):
        """
        Initialises all the frames
        """

        baseDir = FRAMENET_PATH + '/' + LU_DIR_ENV
        pickledLUPath = FRAMENET_PATH + '/' + PICKLED_LU_FILE

        lexicalUnitsIndexByName = {}
        for _f in os.listdir(baseDir):
            if _f.lower().startswith('lu') and _f.lower().endswith('.xml'):
                print >> sys.stderr, 'Loading:', _f, ": ",
                lu = LexicalUnit()
                lu.loadXML(baseDir + '/' + _f)
                print >> sys.stderr, lu['name'], '...', 
                print >> sys.stderr, 'done'

                if lexicalUnitsIndexByName.has_key(lu['name']):
                    lexicalUnitsIndexByName[lu['name']][lu['ID']] = 1
                else:
                    lexicalUnitsIndexByName[lu['name']] = {lu['ID']:1}

        print >> sys.stderr, 'Saving the pickled lu files ...',
        cPickle.dump(lexicalUnitsIndexByName, open(pickledLUPath, 'w'), cPickle.HIGHEST_PROTOCOL)
        print >> sys.stderr, 'done'
        pass
Example #2
0
    def _generatePickledLexicalUnitsIndex(self):
        """
        Initialises all the frames
        """

        baseDir = FRAMENET_PATH + '/' + LU_DIR_ENV
        pickledLUPath = FRAMENET_PATH + '/' + PICKLED_LU_FILE

        lexicalUnitsIndexByName = {}
        for _f in os.listdir(baseDir):
            if _f.lower().startswith('lu') and _f.lower().endswith('.xml'):
                print >> sys.stderr, 'Loading:', _f, ": ",
                lu = LexicalUnit()
                lu.loadXML(baseDir + '/' + _f)
                print >> sys.stderr, lu['name'], '...',
                print >> sys.stderr, 'done'

                if lexicalUnitsIndexByName.has_key(lu['name']):
                    lexicalUnitsIndexByName[lu['name']][lu['ID']] = 1
                else:
                    lexicalUnitsIndexByName[lu['name']] = {lu['ID']: 1}

        print >> sys.stderr, 'Saving the pickled lu files ...',
        cPickle.dump(lexicalUnitsIndexByName, open(pickledLUPath, 'w'),
                     cPickle.HIGHEST_PROTOCOL)
        print >> sys.stderr, 'done'
        pass
Example #3
0
def testLU(fileName):
    a = LexicalUnit()

    try:
        if not a.loadXML(FRAMENET_PATH+"/"+LU_DIR_ENV+"/"+fileName):
            print >> sys.stderr, 'loading:', fileName, 'failed'
    except:
        print >> sys.stderr, 'loading:', fileName, 'failed'
    print a.getLexemes()
    return a
Example #4
0
def testLU(fileName):
    a = LexicalUnit()

    try:
        if not a.loadXML(FRAMENET_PATH + "/" + LU_DIR_ENV + "/" + fileName):
            print >> sys.stderr, 'loading:', fileName, 'failed'
    except:
        print >> sys.stderr, 'loading:', fileName, 'failed'
    print a.getLexemes()
    return a
Example #5
0
    def lookupLexicalUnit(self, headWord, pos):
        """
        This function will look up a given word by its pos. The word must be already
        lemmatised and in lower case. The pos must also be in lower case and it can be
        one of the following:
        (1) v  -- for verb
        (2) n  -- for noun
        (3) a  -- for adjective
        (4) adv -- for adverb
        (5) prep -- for preposition
        (6) num -- for numbers
        (7) intj -- for interjections

        This function will return a dictionary of lexical units which match the (headWord, pos)
        pair. The keys to the dictionary will be the IDs of the lexical units, and the values of
        the dictionary will be LexicalUnit objects.
        """

        pickledLUPath = FRAMENET_PATH + '/' + LU_DIR_ENV

        w = headWord + '.' + pos

        if self['luCache'].has_key(w):
            return self['luCache'][w]
        
        if not self['luIndex'].has_key(w):
            return {}

        objects = {}
        for _id in self['luIndex'][w].keys():
            inputFile = pickledLUPath + '/lu' + str(_id) + '.xml'
            lu = LexicalUnit()
            lu.loadXML(inputFile)
            objects[lu['ID']] = lu

        self['luCache'][w] = objects
        return objects
Example #6
0
    def lookupLexicalUnit(self, headWord, pos):
        """
        This function will look up a given word by its pos. The word must be already
        lemmatised and in lower case. The pos must also be in lower case and it can be
        one of the following:
        (1) v  -- for verb
        (2) n  -- for noun
        (3) a  -- for adjective
        (4) adv -- for adverb
        (5) prep -- for preposition
        (6) num -- for numbers
        (7) intj -- for interjections

        This function will return a dictionary of lexical units which match the (headWord, pos)
        pair. The keys to the dictionary will be the IDs of the lexical units, and the values of
        the dictionary will be LexicalUnit objects.
        """

        pickledLUPath = FRAMENET_PATH + '/' + LU_DIR_ENV

        w = headWord + '.' + pos

        if self['luCache'].has_key(w):
            return self['luCache'][w]

        if not self['luIndex'].has_key(w):
            return {}

        objects = {}
        for _id in self['luIndex'][w].keys():
            inputFile = pickledLUPath + '/lu' + str(_id) + '.xml'
            lu = LexicalUnit()
            lu.loadXML(inputFile)
            objects[lu['ID']] = lu

        self['luCache'][w] = objects
        return objects