Python IdSet.getNames 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Core.IdSet

클래스/타입: IdSet

메소드/함수: getNames

hotexamples.com에서의 예제들: 4

Python IdSet.getNames - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Core.IdSet.IdSet.getNames에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

IdSet(30)

getId(27)

getName(5)

load(3)

write(3)

defineId(2)

getIds(2)

getNames(2)

예제 #1

파일 보기

class ExampleBuilder:
    structureAnalyzer = None
    """
    ExampleBuilder is the abstract base class for specialized example builders.
    Example builders take some data and convert it to examples usable by e.g. SVMs.
    An example builder writes three files, an example-file (in extended Joachim's
    SVM format) and .class_names and .feature_names files, which contain the names
    for the class and feature id-numbers. An example builder can also be given
    pre-existing sets of class and feature ids (optionally in files) so that the
    generated examples are consistent with other, previously generated examples.
    """
    def __init__(self, classSet=None, featureSet=None):
        if (type(classSet) == types.StringType):
            self.classSet = IdSet(filename=classSet)
        else:
            self.classSet = classSet

        if (type(featureSet) == types.StringType):
            self.featureSet = IdSet(filename=featureSet)
        else:
            self.featureSet = featureSet

        self.featureTag = ""
        self.exampleStats = ExampleStats()
        self.parse = None
        self.tokenization = None
        #self.idFileTag = None
        self.classIdFilename = None
        self.featureIdFilename = None

        self.styles = {}
        self._defaultParameters = None
        self._parameterValueLimits = None
        self._setDefaultParameters(["sentenceLimit"])
        self.debug = False

    def hasStyle(self, style):
        return style in self.styles and not self.styles[style]

    def _setDefaultParameters(self, defaults=None, valueLimits=None):
        # Initialize
        if self._defaultParameters == None:
            self._defaultParameters = {}
        if self._parameterValueLimits == None:
            self._parameterValueLimits = {}
        newParameters = Utils.Parameters.get({},
                                             defaults,
                                             valueLimits=valueLimits)
        self._defaultParameters.update(newParameters)
        if valueLimits != None:
            self._parameterValueLimits.update(valueLimits)

    def getParameters(self, parameters):
        return Utils.Parameters.get(parameters,
                                    defaults=self._defaultParameters,
                                    valueLimits=self._parameterValueLimits)

    def setFeature(self, name, value):
        self.features[self.featureSet.getId(self.featureTag + name)] = value

    def getElementCounts(self, filename):
        print >> sys.stderr, "Counting elements:",
        if filename.endswith(".gz"):
            f = gzip.open(filename, "rt")
        else:
            f = open(filename, "rt")
        counts = {"documents": 0, "sentences": 0}
        for line in f:
            if "<document" in line:
                counts["documents"] += 1
            elif "<sentence" in line:
                counts["sentences"] += 1
        f.close()
        print >> sys.stderr, counts
        return counts

    def saveIds(self):
        if self.classIdFilename != None:
            print >> sys.stderr, "Saving class names to", self.classIdFilename
            self.classSet.write(self.classIdFilename)
        else:
            print >> sys.stderr, "Class names not saved"
        if self.featureIdFilename != None:
            print >> sys.stderr, "Saving feature names to", self.featureIdFilename
            self.featureSet.write(self.featureIdFilename)
        else:
            print >> sys.stderr, "Feature names not saved"

    def processCorpus(self,
                      input,
                      output,
                      gold=None,
                      append=False,
                      allowNewIds=True,
                      structureAnalyzer=None):
        # Create intermediate paths if needed
        if os.path.dirname(output) != "" and not os.path.exists(
                os.path.dirname(output)):
            os.makedirs(os.path.dirname(output))
        # Open output file
        openStyle = "wt"
        if append:
            #print "Appending examples"
            openStyle = "at"
        if output.endswith(".gz"):
            outfile = gzip.open(output, openStyle)
        else:
            outfile = open(output, openStyle)

        # Build examples
        self.exampleCount = 0
        if type(input) in types.StringTypes:  # Entered here - Mu
            self.elementCounts = self.getElementCounts(input)
            if self.elementCounts["sentences"] > 0:  # Entered here, 1448 - Mu
                self.progress = ProgressCounter(
                    self.elementCounts["sentences"], "Build examples")
            else:
                self.elementCounts = None
                self.progress = ProgressCounter(None, "Build examples")
        else:
            self.elementCounts = None
            self.progress = ProgressCounter(None, "Build examples")
        # pdb.set_trace()

        # This line generates log below:(getSentences function generates the first 2 lines)
        # Making sentence graphs (GE09.d149.s5): 100.00 % (0:0:1.113)
        # Skipped 381 duplicate interaction edges in SentenceGraphs
        # Defining predicted value range: None - Mu
        self.calculatePredictedRange(
            self.getSentences(input, self.parse, self.tokenization)
        )  # self.parse: mccc; self.tokenization: None

        removeIntersentenceInteractions = True
        if "keep_intersentence" in self.styles and self.styles[
                "keep_intersentence"]:
            print >> sys.stderr, "Keeping intersentence interactions for input corpus"
            removeIntersentenceInteractions = False  # this is True  - Mu
        inputIterator = getCorpusIterator(
            input,
            None,
            self.parse,
            self.tokenization,
            removeIntersentenceInteractions=removeIntersentenceInteractions)

        # pdb.set_trace()
        #goldIterator = []
        if gold != None:  # Entered here - Mu
            removeGoldIntersentenceInteractions = True
            if "keep_intersentence_gold" in self.styles and self.styles[
                    "keep_intersentence_gold"]:
                print >> sys.stderr, "Keeping intersentence interactions for gold corpus"
                removeGoldIntersentenceInteractions = False  # this is False - Mu
            goldIterator = getCorpusIterator(
                gold,
                None,
                self.parse,
                self.tokenization,
                removeIntersentenceInteractions=
                removeGoldIntersentenceInteractions)
            for inputSentences, goldSentences in itertools.izip_longest(
                    inputIterator, goldIterator, fillvalue=None):
                assert inputSentences != None
                assert goldSentences != None
                # pdb.set_trace()
                # see the documentation of function processSentence() in this script
                # inputSentences[1].sentence is the unmerged version
                # inputSentences[1].sentenceGraph is the merged version, meaning that when generating sentenceGraph,
                # duplicated intereactions are removed(actually skipped, not added to the graph, but not really removed) - Mu
                self.processDocument(inputSentences,
                                     goldSentences,
                                     outfile,
                                     structureAnalyzer=structureAnalyzer)
        else:
            for inputSentences in inputIterator:
                self.processDocument(inputSentences,
                                     None,
                                     outfile,
                                     structureAnalyzer=structureAnalyzer)
        outfile.close()
        self.progress.endUpdate()

        # Show statistics
        print >> sys.stderr, "Examples built:", self.exampleCount
        print >> sys.stderr, "Features:", len(self.featureSet.getNames())
        print >> sys.stderr, "Classes:", len(self.classSet.getNames())
        print >> sys.stderr, "Style:", Utils.Parameters.toString(
            self.getParameters(self.styles))
        if self.exampleStats.getExampleCount() > 0:
            self.exampleStats.printStats()

        # Save Ids
        if allowNewIds:
            self.saveIds()

    def processDocument(self,
                        sentences,
                        goldSentences,
                        outfile,
                        structureAnalyzer=None):
        #calculatePredictedRange(self, sentences)
        for i in range(len(sentences)):
            sentence = sentences[i]
            goldSentence = None
            if goldSentences != None:
                goldSentence = goldSentences[i]
            self.progress.update(
                1, "Building examples (" + sentence.sentence.get("id") + "): ")
            self.processSentence(sentence,
                                 outfile,
                                 goldSentence,
                                 structureAnalyzer=structureAnalyzer)

    def processSentence(self,
                        sentence,
                        outfile,
                        goldSentence=None,
                        structureAnalyzer=None):
        '''
        sentence: Utils.InteractionXML.SentenceElements.SentenceElements instance
        sentence.sentence: Element 'sentence' in the xml file
        '''
        # pdb.set_trace()
        # Process filtering rules
        # does NOT entered here since self.styles["sentenceLimit"] is None - Mu
        if "sentenceLimit" in self.styles and self.styles[
                "sentenceLimit"]:  # Rules for limiting which sentences to process
            # Get the rule list
            limitRules = self.styles["sentenceLimit"]
            if type(limitRules) in types.StringTypes:
                limitRules = [limitRules]
            # Get the list of sentence element attribute names
            sentenceElement = sentence.sentence
            sentenceAttributes = sorted(sentenceElement.attrib.keys())
            # Filter sentences based on matching rules to their attribute values
            for rule in limitRules:
                for sentAttr in sentenceAttributes:
                    # Rule are of the form "attr.value" where "attr" is the name
                    # of the attribute to match, and "value" a substring within
                    # that attribute
                    if rule.startswith(sentAttr +
                                       "."):  # rule matches the attribute
                        value = rule.split(
                            ".", 1)[-1]  # get the value part of the rule
                        if value not in sentenceElement.get(
                                sentAttr
                        ):  # rule value must be a substring of the attribute value
                            return  # discard all sentences that do not match all rules
        # Process the sentence
        if sentence.sentenceGraph != None:
            goldGraph = None
            if goldSentence != None:
                goldGraph = goldSentence.sentenceGraph
            # c, sentenceGraph_return, argCombinations_return = self.buildExamplesFromGraph(sentence.sentenceGraph, outfile, goldGraph, structureAnalyzer=structureAnalyzer)
            # self.exampleCount += c
            self.exampleCount += self.buildExamplesFromGraph(
                sentence.sentenceGraph,
                outfile,
                goldGraph,
                structureAnalyzer=structureAnalyzer)
        # return sentenceGraph_return, argCombinations_return

    @classmethod
    def run(cls,
            input,
            output,
            parse,
            tokenization,
            style,
            classIds=None,
            featureIds=None,
            gold=None,
            append=False,
            allowNewIds=True,
            structureAnalyzer=None,
            debug=False):
        print >> sys.stderr, "Running", cls.__name__
        print >> sys.stderr, "  input:", input
        if gold != None:
            print >> sys.stderr, "  gold:", gold
        print >> sys.stderr, "  output:", output, "(append:", str(append) + ")"
        print >> sys.stderr, "  add new class/feature ids:", allowNewIds
        if not isinstance(style, types.StringTypes):
            style = Utils.Parameters.toString(style)
        print >> sys.stderr, "  style:", style
        if tokenization == None:
            print >> sys.stderr, "  parse:", parse
        else:
            print >> sys.stderr, "  parse:", parse + ", tokenization:", tokenization
        classSet, featureSet = cls.getIdSets(
            classIds, featureIds, allowNewIds)  #cls.getIdSets(idFileTag)
        builder = cls(style=style, classSet=classSet, featureSet=featureSet)
        builder.debug = debug
        #builder.idFileTag = idFileTag
        builder.classIdFilename = classIds
        builder.featureIdFilename = featureIds
        builder.parse = parse
        builder.tokenization = tokenization
        builder.processCorpus(input,
                              output,
                              gold,
                              append=append,
                              allowNewIds=allowNewIds,
                              structureAnalyzer=structureAnalyzer)
        return builder

    def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph=None):
        raise NotImplementedError

    def definePredictedValueRange(self, sentences, elementName):
        pass

    def getPredictedValueRange(self):
        return None

    @classmethod
    def getIdSets(self, classIds=None, featureIds=None, allowNewIds=True):
        # Class ids
        #print classIds
        #print featureIds
        if classIds != None and os.path.exists(classIds):
            print >> sys.stderr, "Using predefined class names from", classIds
            classSet = IdSet(allowNewIds=allowNewIds)
            classSet.load(classIds)
        else:
            print >> sys.stderr, "No predefined class names"
            classSet = None
        # Feature ids
        if featureIds != None and os.path.exists(featureIds):
            print >> sys.stderr, "Using predefined feature names from", featureIds
            featureSet = IdSet(allowNewIds=allowNewIds)
            featureSet.load(featureIds)
        else:
            print >> sys.stderr, "No predefined feature names"
            featureSet = None
        return classSet, featureSet


#        if idFileTag != None and os.path.exists(idFileTag + ".feature_names.gz") and os.path.exists(idFileTag + ".class_names"):
#            print >> sys.stderr, "Using predefined class and feature names"
#            featureSet = IdSet()
#            featureSet.load(idFileTag + ".feature_names.gz")
#            classSet = IdSet()
#            classSet.load(idFileTag + ".class_names")
#            return classSet, featureSet
#        else:
#            print >> sys.stderr, "No predefined class or feature-names"
#            if idFileTag != None:
#                assert(not os.path.exists(idFileTag + ".feature_names.gz")), idFileTag
#                assert(not os.path.exists(idFileTag + ".class_names")), idFileTag
#            return None, None

    def getSentences(self, input, parse, tokenization, removeNameInfo=False):
        # pdb.set_trace()
        # input is the path to the corpus xml file
        if type(input) != types.ListType:  # Program entered here - Mu
            # Load corpus and make sentence graphs
            # pdb.set_trace()
            corpusElements = Core.SentenceGraph.loadCorpus(
                input, parse, tokenization, removeNameInfo=removeNameInfo)
            sentences = []
            for sentence in corpusElements.sentences:
                if sentence.sentenceGraph != None:  # required for event detection
                    sentences.append([sentence.sentenceGraph, None])
            return sentences
        else:  # assume input is already a list of sentences
            assert (removeNameInfo == False)
            return input

    def calculatePredictedRange(self, sentences):
        print >> sys.stderr, "Defining predicted value range:",
        sentenceElements = []
        for sentence in sentences:
            sentenceElements.append(sentence[0].sentenceElement)
        self.definePredictedValueRange(sentenceElements, "entity")
        print >> sys.stderr, self.getPredictedValueRange()

예제 #2

파일 보기

파일: ExampleBuilder.py 프로젝트: DUT-LiuYang/TEES

class ExampleBuilder:
    structureAnalyzer = None
    """ 
    ExampleBuilder is the abstract base class for specialized example builders.
    Example builders take some data and convert it to examples usable by e.g. SVMs.
    An example builder writes three files, an example-file (in extended Joachim's
    SVM format) and .class_names and .feature_names files, which contain the names
    for the class and feature id-numbers. An example builder can also be given
    pre-existing sets of class and feature ids (optionally in files) so that the
    generated examples are consistent with other, previously generated examples.
    """    
    def __init__(self, classSet=None, featureSet=None):
        if(type(classSet) == types.StringType):
            self.classSet = IdSet(filename=classSet)
        else:
            self.classSet = classSet
        
        if(type(featureSet) == types.StringType):
            self.featureSet = IdSet(filename=featureSet)
        else:
            self.featureSet = featureSet
        
        self.featureTag = ""      
        self.exampleStats = ExampleStats()
        self.parse = None
        self.tokenization = None
        #self.idFileTag = None
        self.classIdFilename = None
        self.featureIdFilename = None
        
        self.styles = None
        self._defaultParameters = None
        self._parameterValueLimits = None
        self._setDefaultParameters(["sentenceLimit"])
        self.debug = False
    
    def _setDefaultParameters(self, defaults=None, valueLimits=None):
        # Initialize
        if self._defaultParameters == None:
            self._defaultParameters = {}
        if self._parameterValueLimits == None:
            self._parameterValueLimits = {}
        newParameters = Utils.Parameters.get({}, defaults, valueLimits=valueLimits)
        self._defaultParameters.update(newParameters)
        if valueLimits != None:
            self._parameterValueLimits.update(valueLimits)
    
    def getParameters(self, parameters):
        return Utils.Parameters.get(parameters, defaults=self._defaultParameters, valueLimits=self._parameterValueLimits)
    
    def setFeature(self, name, value):
        self.features[self.featureSet.getId(self.featureTag+name)] = value
    
    def getElementCounts(self, filename):
        print >> sys.stderr, "Counting elements:",
        if filename.endswith(".gz"):
            f = gzip.open(filename, "rt")
        else:
            f = open(filename, "rt")
        counts = {"documents":0, "sentences":0}
        for line in f:
            if "<document" in line:
                counts["documents"] += 1
            elif "<sentence" in line:
                counts["sentences"] += 1
        f.close()
        print >> sys.stderr, counts
        return counts

    def saveIds(self):
        if self.classIdFilename != None:
            print >> sys.stderr, "Saving class names to", self.classIdFilename
            self.classSet.write(self.classIdFilename)
        else:
            print >> sys.stderr, "Class names not saved"
        if self.featureIdFilename != None:
            print >> sys.stderr, "Saving feature names to", self.featureIdFilename
            self.featureSet.write(self.featureIdFilename)
        else:
            print >> sys.stderr, "Feature names not saved"

    def processCorpus(self, input, output, gold=None, append=False, allowNewIds=True, structureAnalyzer=None):
        # Create intermediate paths if needed
        if os.path.dirname(output) != "" and not os.path.exists(os.path.dirname(output)):
            os.makedirs(os.path.dirname(output))
        # Open output file
        openStyle = "wt"
        if append:
            #print "Appending examples"
            openStyle = "at"
        if output.endswith(".gz"):
            outfile = gzip.open(output, openStyle)
        else:
            outfile = open(output, openStyle)
        
        # Build examples
        self.exampleCount = 0
        if type(input) in types.StringTypes:
            self.elementCounts = self.getElementCounts(input)
            if self.elementCounts["sentences"] > 0:
                self.progress = ProgressCounter(self.elementCounts["sentences"], "Build examples")
            else:
                self.elementCounts = None
                self.progress = ProgressCounter(None, "Build examples")
        else:
            self.elementCounts = None
            self.progress = ProgressCounter(None, "Build examples")
        
        self.calculatePredictedRange(self.getSentences(input, self.parse, self.tokenization))
        
        removeIntersentenceInteractions = True
        if "keep_intersentence" in self.styles and self.styles["keep_intersentence"]:
            print >> sys.stderr, "Keeping intersentence interactions for input corpus"
            removeIntersentenceInteractions = False
        inputIterator = getCorpusIterator(input, None, self.parse, self.tokenization, removeIntersentenceInteractions=removeIntersentenceInteractions)            
        
        #goldIterator = []
        if gold != None:
            removeGoldIntersentenceInteractions = True
            if "keep_intersentence_gold" in self.styles and self.styles["keep_intersentence_gold"]:
                print >> sys.stderr, "Keeping intersentence interactions for gold corpus"
                removeGoldIntersentenceInteractions = False
            goldIterator = getCorpusIterator(gold, None, self.parse, self.tokenization, removeIntersentenceInteractions=removeGoldIntersentenceInteractions)
            for inputSentences, goldSentences in itertools.izip_longest(inputIterator, goldIterator, fillvalue=None):
                assert inputSentences != None
                assert goldSentences != None
                self.processDocument(inputSentences, goldSentences, outfile, structureAnalyzer=structureAnalyzer)
        else:
            for inputSentences in inputIterator:
                self.processDocument(inputSentences, None, outfile, structureAnalyzer=structureAnalyzer)
        outfile.close()
        self.progress.endUpdate()
        
        # Show statistics
        print >> sys.stderr, "Examples built:", self.exampleCount
        print >> sys.stderr, "Features:", len(self.featureSet.getNames())
        print >> sys.stderr, "Style:", Utils.Parameters.toString(self.getParameters(self.styles))
        if self.exampleStats.getExampleCount() > 0:
            self.exampleStats.printStats()
    
        # Save Ids
        if allowNewIds:
            self.saveIds()
    
    def processDocument(self, sentences, goldSentences, outfile, structureAnalyzer=None):
        #calculatePredictedRange(self, sentences)            
        for i in range(len(sentences)):
            sentence = sentences[i]
            goldSentence = None
            if goldSentences != None:
                goldSentence = goldSentences[i]
            self.progress.update(1, "Building examples ("+sentence.sentence.get("id")+"): ")
            self.processSentence(sentence, outfile, goldSentence, structureAnalyzer=structureAnalyzer)
    
    def processSentence(self, sentence, outfile, goldSentence=None, structureAnalyzer=None):
        # Process filtering rules
        if self.styles["sentenceLimit"]: # Rules for limiting which sentences to process
            # Get the rule list
            limitRules = self.styles["sentenceLimit"]
            if type(limitRules) in types.StringTypes:
                limitRules = [limitRules]
            # Get the list of sentence element attribute names
            sentenceElement = sentence.sentence
            sentenceAttributes = sorted(sentenceElement.attrib.keys())
            # Filter sentences based on matching rules to their attribute values
            for rule in limitRules:
                for sentAttr in sentenceAttributes:
                    # Rule are of the form "attr.value" where "attr" is the name
                    # of the attribute to match, and "value" a substring within
                    # that attribute
                    if rule.startswith(sentAttr + "."): # rule matches the attribute
                        value = rule.split(".", 1)[-1] # get the value part of the rule
                        if value not in sentenceElement.get(sentAttr): # rule value must be a substring of the attribute value
                            return # discard all sentences that do not match all rules
        # Process the sentence
        if sentence.sentenceGraph != None:
            goldGraph = None
            if goldSentence != None:
                goldGraph = goldSentence.sentenceGraph
            self.exampleCount += self.buildExamplesFromGraph(sentence.sentenceGraph, outfile, goldGraph, structureAnalyzer=structureAnalyzer)

    @classmethod
    def run(cls, input, output, parse, tokenization, style, classIds=None, featureIds=None, gold=None, append=False, allowNewIds=True, structureAnalyzer=None, debug=False):
        print >> sys.stderr, "Running", cls.__name__
        print >> sys.stderr, "  input:", input
        if gold != None:
            print >> sys.stderr, "  gold:", gold
        print >> sys.stderr, "  output:", output, "(append:", str(append) + ")"
        print >> sys.stderr, "  add new class/feature ids:", allowNewIds
        if not isinstance(style, types.StringTypes):
            style = Utils.Parameters.toString(style)
        print >> sys.stderr, "  style:", style
        if tokenization == None: 
            print >> sys.stderr, "  parse:", parse
        else:
            print >> sys.stderr, "  parse:", parse + ", tokenization:", tokenization
        classSet, featureSet = cls.getIdSets(classIds, featureIds, allowNewIds) #cls.getIdSets(idFileTag)
        builder = cls(style=style, classSet=classSet, featureSet=featureSet)
        builder.debug = debug
        #builder.idFileTag = idFileTag
        builder.classIdFilename = classIds
        builder.featureIdFilename = featureIds
        builder.parse = parse ; builder.tokenization = tokenization
        builder.processCorpus(input, output, gold, append=append, allowNewIds=allowNewIds, structureAnalyzer=structureAnalyzer)
        return builder

    def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph=None):
        raise NotImplementedError
    
    def definePredictedValueRange(self, sentences, elementName):
        pass
    
    def getPredictedValueRange(self):
        return None
    
    @classmethod
    def getIdSets(self, classIds=None, featureIds=None, allowNewIds=True):
        # Class ids
        #print classIds
        #print featureIds
        if classIds != None and os.path.exists(classIds):
            print >> sys.stderr, "Using predefined class names from", classIds
            classSet = IdSet(allowNewIds=allowNewIds)
            classSet.load(classIds)
        else:
            print >> sys.stderr, "No predefined class names"
            classSet = None
        # Feature ids
        if featureIds != None and os.path.exists(featureIds):
            print >> sys.stderr, "Using predefined feature names from", featureIds
            featureSet = IdSet(allowNewIds=allowNewIds)
            featureSet.load(featureIds)
        else:
            print >> sys.stderr, "No predefined feature names"
            featureSet = None
        return classSet, featureSet
        
#        if idFileTag != None and os.path.exists(idFileTag + ".feature_names.gz") and os.path.exists(idFileTag + ".class_names"):
#            print >> sys.stderr, "Using predefined class and feature names"
#            featureSet = IdSet()
#            featureSet.load(idFileTag + ".feature_names.gz")
#            classSet = IdSet()
#            classSet.load(idFileTag + ".class_names")
#            return classSet, featureSet
#        else:
#            print >> sys.stderr, "No predefined class or feature-names"
#            if idFileTag != None:
#                assert(not os.path.exists(idFileTag + ".feature_names.gz")), idFileTag
#                assert(not os.path.exists(idFileTag + ".class_names")), idFileTag
#            return None, None


    def getSentences(self, input, parse, tokenization, removeNameInfo=False):
        if type(input) != types.ListType:
            # Load corpus and make sentence graphs
            corpusElements = Core.SentenceGraph.loadCorpus(input, parse, tokenization, removeNameInfo=removeNameInfo)
            sentences = []
            for sentence in corpusElements.sentences:
                if sentence.sentenceGraph != None: # required for event detection
                    sentences.append( [sentence.sentenceGraph,None] )
            return sentences
        else: # assume input is already a list of sentences
            assert(removeNameInfo == False)
            return input

    def calculatePredictedRange(self, sentences):
        print >> sys.stderr, "Defining predicted value range:",
        sentenceElements = []
        for sentence in sentences:
            sentenceElements.append(sentence[0].sentenceElement)
        self.definePredictedValueRange(sentenceElements, "entity")
        print >> sys.stderr, self.getPredictedValueRange()

예제 #3

파일 보기

파일: MultiLabelClassifier.py 프로젝트: jbjorne/Tdevel

 def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False, classIds=None): # , timeout=None):
     """
     Classify examples with a pre-trained model.
     
     @type examples: string (filename) or list (or iterator) of examples
     @param examples: a list or file containing examples in SVM-format
     @type modelPath: string
     @param modelPath: filename of the pre-trained model file
     @type parameters: a dictionary or string
     @param parameters: parameters for the classifier
     @type output: string
     @param output: the name of the predictions file to be written
     @type forceInternal: Boolean
     @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
     """
     if type(parameters) == types.StringType:
         parameters = splitParameters(parameters)
     timer = Timer()
     if type(examples) == types.ListType:
         print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath
         examples, predictions = self.filterClassificationSet(examples, False)
         testPath = self.tempDir+"/test.dat"
         Example.writeExamples(examples, testPath)
     else:
         print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
         testPath = examples
         examples = Example.readExamples(examples,False)
     if parameters != None:
         parameters = copy.copy(parameters)
         if parameters.has_key("c"):
             del parameters["c"]
         if parameters.has_key("predefined"):
             parameters = copy.copy(parameters)
             modelPath = os.path.join(parameters["predefined"][0],"classifier/model")
             del parameters["predefined"]
     # Read model
     if modelPath == None:
         modelPath = "model-multilabel"
     classModels = {}
     if modelPath.endswith(".gz"):
         f = gzip.open(modelPath, "rt")
     else:
         f = open(modelPath, "rt")
     thresholds = {}
     for line in f:
         key, value, threshold = line.split()
         classModels[key] = value
         if threshold != "None":
             thresholds[key] = float(threshold)
         else:
             thresholds[key] = 0.0
     f.close()
     mergedPredictions = []
     if type(classIds) == types.StringType:
         classIds = IdSet(filename=classIds)
     #print classModels
     print "Thresholds", thresholds
     classifierBin = Settings.SVMMultiClassDir+"/svm_multiclass_classify"
     print parameters
     if "classifier" in parameters and "svmperf" in parameters["classifier"]:
         classifierBin = Settings.SVMPerfDir+"/svm_perf_classify"
         parameters = copy.copy(parameters)
         del parameters["classifier"]
     for className in classIds.getNames():
         if className != "neg" and not "---" in className:
             classId = classIds.getId(className)
             if thresholds[str(className)] != 0.0:
                 print >> sys.stderr, "Classifying", className, "with threshold", thresholds[str(className)]
             else:
                 print >> sys.stderr, "Classifying", className
             args = [classifierBin]
             #self.__addParametersToSubprocessCall(args, parameters)
             classOutput = "predictions" + ".cls-" + className
             logFile = open("svmmulticlass" + ".cls-" + className + ".log","at")
             args += [testPath, classModels[str(className)], classOutput]
             print args
             subprocess.call(args, stdout = logFile, stderr = logFile)
             cls.addPredictions(classOutput, mergedPredictions, classId, len(classIds.Ids), threshold=thresholds[str(className)])
     print >> sys.stderr, timer.toString()
     
     predFileName = output
     f = open(predFileName, "wt")
     for mergedPred in mergedPredictions:
         if len(mergedPred[0]) > 1 and "1" in mergedPred[0]:
             mergedPred[0].remove("1")
         mergedPred[1] = str(mergedPred[1])
         mergedPred[0] = ",".join(sorted(list(mergedPred[0])))
         f.write(" ".join(mergedPred) + "\n")
     f.close()
     
     return mergedPredictions

예제 #4

파일 보기

    def test(cls,
             examples,
             modelPath,
             output=None,
             parameters=None,
             forceInternal=False,
             classIds=None):  # , timeout=None):
        """
        Classify examples with a pre-trained model.
        
        @type examples: string (filename) or list (or iterator) of examples
        @param examples: a list or file containing examples in SVM-format
        @type modelPath: string
        @param modelPath: filename of the pre-trained model file
        @type parameters: a dictionary or string
        @param parameters: parameters for the classifier
        @type output: string
        @param output: the name of the predictions file to be written
        @type forceInternal: Boolean
        @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
        """
        if type(parameters) == types.StringType:
            parameters = splitParameters(parameters)
        timer = Timer()
        if type(examples) == types.ListType:
            print >> sys.stderr, "Classifying", len(
                examples), "with SVM-MultiClass model", modelPath
            examples, predictions = self.filterClassificationSet(
                examples, False)
            testPath = self.tempDir + "/test.dat"
            Example.writeExamples(examples, testPath)
        else:
            print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
            testPath = examples
            examples = Example.readExamples(examples, False)
        if parameters != None:
            parameters = copy.copy(parameters)
            if parameters.has_key("c"):
                del parameters["c"]
            if parameters.has_key("predefined"):
                parameters = copy.copy(parameters)
                modelPath = os.path.join(parameters["predefined"][0],
                                         "classifier/model")
                del parameters["predefined"]
        # Read model
        if modelPath == None:
            modelPath = "model-multilabel"
        classModels = {}
        if modelPath.endswith(".gz"):
            f = gzip.open(modelPath, "rt")
        else:
            f = open(modelPath, "rt")
        thresholds = {}
        for line in f:
            key, value, threshold = line.split()
            classModels[key] = value
            if threshold != "None":
                thresholds[key] = float(threshold)
            else:
                thresholds[key] = 0.0
        f.close()
        mergedPredictions = []
        if type(classIds) == types.StringType:
            classIds = IdSet(filename=classIds)
        #print classModels
        print "Thresholds", thresholds
        classifierBin = Settings.SVMMultiClassDir + "/svm_multiclass_classify"
        print parameters
        if "classifier" in parameters and "svmperf" in parameters["classifier"]:
            classifierBin = Settings.SVMPerfDir + "/svm_perf_classify"
            parameters = copy.copy(parameters)
            del parameters["classifier"]
        for className in classIds.getNames():
            if className != "neg" and not "---" in className:
                classId = classIds.getId(className)
                if thresholds[str(className)] != 0.0:
                    print >> sys.stderr, "Classifying", className, "with threshold", thresholds[
                        str(className)]
                else:
                    print >> sys.stderr, "Classifying", className
                args = [classifierBin]
                #self.__addParametersToSubprocessCall(args, parameters)
                classOutput = "predictions" + ".cls-" + className
                logFile = open("svmmulticlass" + ".cls-" + className + ".log",
                               "at")
                args += [testPath, classModels[str(className)], classOutput]
                print args
                subprocess.call(args, stdout=logFile, stderr=logFile)
                cls.addPredictions(classOutput,
                                   mergedPredictions,
                                   classId,
                                   len(classIds.Ids),
                                   threshold=thresholds[str(className)])
        print >> sys.stderr, timer.toString()

        predFileName = output
        f = open(predFileName, "wt")
        for mergedPred in mergedPredictions:
            if len(mergedPred[0]) > 1 and "1" in mergedPred[0]:
                mergedPred[0].remove("1")
            mergedPred[1] = str(mergedPred[1])
            mergedPred[0] = ",".join(sorted(list(mergedPred[0])))
            f.write(" ".join(mergedPred) + "\n")
        f.close()

        return mergedPredictions