def writeXML(self, examples, predictions, corpus, outputFile, classSet=None, parse=None, tokenization=None, goldCorpus=None, exampleStyle=None, structureAnalyzer=None): """ Writes task 3 examples to interaction XML. Assumes task 3 classification is done with SVMMulticlass Classifier, used for two classes. """ print >> sys.stderr, "Adding task 3 to Interaction XML" examples, predictions = self.loadExamples(examples, predictions) if type(classSet) == types.StringType: # class names are in file classSet = IdSet(filename=classSet) classIds = None if classSet != None: classIds = classSet.getIds() corpusTree = ETUtils.ETFromObj(corpus) corpusRoot = corpusTree.getroot() # Determine subtask task3Type = None for example in examples: assert example[3].has_key("t3type") task3Type = example[3]["t3type"] break if task3Type == None: if outputFile != None: print >> sys.stderr, "Writing corpus to", outputFile ETUtils.write(corpusRoot, outputFile) return corpusTree assert task3Type in ["multiclass", "speculation", "negation"] # Remove the task 3 subtask information if it already exists for entity in corpusRoot.getiterator("entity"): if task3Type == "multiclass": entity.set("speculation", "False") entity.set("negation", "False") elif task3Type == "speculation": entity.set("speculation", "False") else: # task3Type == "negation" entity.set("negation", "False") specMap = {} negMap = {} for example, prediction in itertools.izip(examples, predictions): assert example[3]["xtype"] == "task3" if example[3]["t3type"] == "multiclass": if isinstance(prediction, dict): encoded = prediction["prediction"] predictedModifiers = [ classSet.getName(i) for i in range(len(encoded)) if encoded[i] == 1 ] else: predictedClassName = classSet.getName(prediction[0]) predictedModifiers = "" if predictedClassName != "neg": predictedModifiers = predictedClassName.split("---") if "negation" in predictedModifiers: assert not negMap.has_key(example[3]["entity"]) negMap[example[3]["entity"]] = (True, prediction) if "speculation" in predictedModifiers: assert not specMap.has_key(example[3]["entity"]) specMap[example[3]["entity"]] = (True, prediction) else: if example[3]["t3type"] == "speculation": map = specMap else: map = negMap if prediction[0] != 1: assert not map.has_key(example[3]["entity"]) map[example[3]["entity"]] = (True, prediction) else: assert not map.has_key(example[3]["entity"]) map[example[3]["entity"]] = (False, prediction) for entity in corpusRoot.getiterator("entity"): eId = entity.get("id") if task3Type == "multiclass": if specMap.has_key(eId): entity.set("speculation", str(specMap[eId][0])) entity.set( "modConf", self.getPredictionStrengthString( specMap[eId][1], classSet, classIds)) if negMap.has_key(eId): entity.set("negation", str(negMap[eId][0])) entity.set( "modConf", self.getPredictionStrengthString( negMap[eId][1], classSet, classIds)) else: if task3Type == "speculation": if specMap.has_key(eId): entity.set("speculation", str(specMap[eId][0])) entity.set( "specConf", self.getPredictionStrengthString( specMap[eId][1], classSet, classIds, [""])) elif task3Type == "negation": if negMap.has_key(eId): entity.set("negation", str(negMap[eId][0])) entity.set( "negConf", self.getPredictionStrengthString( negMap[eId][1], classSet, classIds, ["", "speculation"])) # Write corpus if outputFile != None: print >> sys.stderr, "Writing corpus to", outputFile ETUtils.write(corpusRoot, outputFile) return corpusTree
def writeXML(self, examples, predictions, corpus, outputFile, classSet=None, parse=None, tokenization=None, goldCorpus=None, exampleStyle=None): """ Writes task 3 examples to interaction XML. Assumes task 3 classification is done with SVMMulticlass Classifier, used for two classes. """ print >> sys.stderr, "Adding task 3 to Interaction XML" examples, predictions = self.loadExamples(examples, predictions) if type(classSet) == types.StringType: # class names are in file classSet = IdSet(filename=classSet) classIds = None if classSet != None: classIds = classSet.getIds() corpusTree = ETUtils.ETFromObj(corpus) corpusRoot = corpusTree.getroot() # Determine subtask task3Type = None for example in examples: assert example[3].has_key("t3type") task3Type = example[3]["t3type"] break if task3Type == None: if outputFile != None: print >> sys.stderr, "Writing corpus to", outputFile ETUtils.write(corpusRoot, outputFile) return corpusTree assert task3Type in ["multiclass", "speculation", "negation"] # Remove the task 3 subtask information if it already exists for entity in corpusRoot.getiterator("entity"): if task3Type == "multiclass": entity.set("speculation", "False") entity.set("negation", "False") elif task3Type == "speculation": entity.set("speculation", "False") else: # task3Type == "negation" entity.set("negation", "False") specMap = {} negMap = {} for example, prediction in itertools.izip(examples, predictions): assert example[3]["xtype"] == "task3" if example[3]["t3type"] == "multiclass": predictedClassName = classSet.getName(prediction[0]) if predictedClassName != "neg": predictedModifiers = predictedClassName.split("---") if "negation" in predictedModifiers: assert not negMap.has_key(example[3]["entity"]) negMap[example[3]["entity"]] = (True, prediction) if "speculation" in predictedModifiers: assert not specMap.has_key(example[3]["entity"]) specMap[example[3]["entity"]] = (True, prediction) else: if example[3]["t3type"] == "speculation": map = specMap else: map = negMap if prediction[0] != 1: assert not map.has_key(example[3]["entity"]) map[example[3]["entity"]] = (True, prediction) else: assert not map.has_key(example[3]["entity"]) map[example[3]["entity"]] = (False, prediction) for entity in corpusRoot.getiterator("entity"): eId = entity.get("id") if task3Type == "multiclass": if specMap.has_key(eId): entity.set("speculation", str(specMap[eId][0])) entity.set("modPred", self.getPredictionStrengthString(specMap[eId][1], classSet, classIds)) if negMap.has_key(eId): entity.set("negation", str(negMap[eId][0])) entity.set("modPred", self.getPredictionStrengthString(negMap[eId][1], classSet, classIds)) else: if task3Type == "speculation": if specMap.has_key(eId): entity.set("speculation", str(specMap[eId][0])) entity.set("specPred", self.getPredictionStrengthString(specMap[eId][1], classSet, classIds, [""])) elif task3Type == "negation": if negMap.has_key(eId): entity.set("negation", str(negMap[eId][0])) entity.set("negPred", self.getPredictionStrengthString(negMap[eId][1], classSet, classIds, ["","speculation"])) # Write corpus if outputFile != None: print >> sys.stderr, "Writing corpus to", outputFile ETUtils.write(corpusRoot, outputFile) return corpusTree
def writeXML(self, examples, predictions, corpus, outputFile, classSet=None, parse=None, tokenization=None, goldCorpus=None): #print >> sys.stderr, "Writing output to Interaction XML" corpus = self.loadCorpus(corpus, parse, tokenization) if goldCorpus != None: goldCorpus = self.loadCorpus(corpus, parse, tokenization) examples, predictions = self.loadExamples(examples, predictions) if type(classSet) == types.StringType: # class names are in file classSet = IdSet(filename=classSet) classIds = None if classSet != None: classIds = classSet.getIds() #counter = ProgressCounter(len(corpus.sentences), "Write Examples") exampleQueue = [] # One sentence's examples predictionsByExample = {} currentMajorId = None prevMajorIds = set() processedSentenceIds = set() xType = None count = 0 for example in examples: count += 1 assert count > 0 progress = ProgressCounter(count, "Write Examples") for example, prediction in itertools.izip_longest(examples, predictions): assert example != None assert prediction != None majorId, minorId = example[0].rsplit(".x", 1) #if currentMajorId == "GENIA.d114.s9": print "Start" if majorId != currentMajorId: # new sentence if currentMajorId != None: #if currentMajorId == "GENIA.d114.s9": print "JAA" processedSentenceIds.add(currentMajorId) sentenceObject = corpus.sentencesById[currentMajorId] goldSentence = None if goldCorpus != None: goldSentence = goldCorpus.sentencesById[currentMajorId] self.writeXMLSentence(exampleQueue, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=goldSentence) # process queue progress.update(len(exampleQueue), "Writing examples ("+exampleQueue[-1][0]+"): ") exampleQueue = [] predictionsByExample = {} prevMajorIds.add(currentMajorId) assert majorId not in prevMajorIds, majorId currentMajorId = majorId exampleQueue.append(example) # queue example predictionsByExample[example[0]] = prediction assert example[3]["xtype"] == self.xType, str(example[3]["xtype"]) + "/" + str(self.xType) # Process what is still in queue if currentMajorId != None: processedSentenceIds.add(currentMajorId) sentenceObject = corpus.sentencesById[currentMajorId] goldSentence = None if goldCorpus != None: goldSentence = goldCorpus.sentencesById[currentMajorId] self.writeXMLSentence(exampleQueue, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=goldSentence) # process queue progress.update(len(exampleQueue), "Writing examples ("+exampleQueue[-1][0]+"): ") exampleQueue = [] predictionsByExample = {} # Process sentences with no examples (e.g. to clear interactions) for sentenceId in sorted(corpus.sentencesById.keys()): if sentenceId not in processedSentenceIds: sentenceObject = corpus.sentencesById[sentenceId] goldSentence = None if goldCorpus != None: goldSentence = goldCorpus.sentencesById[currentMajorId] self.writeXMLSentence([], {}, sentenceObject, classSet, classIds, goldSentence=goldSentence) # Print statistics if len(self.counts) > 0: print >> sys.stderr, self.counts self.counts = defaultdict(int) # Write corpus if outputFile != None: print >> sys.stderr, "Writing corpus to", outputFile ETUtils.write(corpus.rootElement, outputFile) return corpus.tree
def writeXML(self, examples, predictions, corpus, outputFile, classSet=None, parse=None, tokenization=None, goldCorpus=None, exampleStyle=None, structureAnalyzer=None): #print >> sys.stderr, "Writing output to Interaction XML" corpus = self.loadCorpus(corpus, parse, tokenization) if goldCorpus != None: goldCorpus = self.loadCorpus(corpus, parse, tokenization) examples, predictions = self.loadExamples(examples, predictions) if type(classSet) == types.StringType: # class names are in file classSet = IdSet(filename=classSet) classIds = None if classSet != None: classIds = classSet.getIds() #counter = ProgressCounter(len(corpus.sentences), "Write Examples") exampleQueue = [] # One sentence's examples predictionsByExample = {} currentMajorId = None prevMajorIds = set() processedSentenceIds = set() xType = None count = 0 for example in examples: count += 1 #assert count > 0 if count > 0: progress = ProgressCounter(count, "Write Examples") else: predCount = 0 for prediction in predictions: predCount += 1 assert predCount == 0 for example, prediction in itertools.izip_longest( examples, predictions): assert example != None assert prediction != None majorId, minorId = example[0].rsplit(".x", 1) #if currentMajorId == "GENIA.d114.s9": print "Start" if majorId != currentMajorId: # new sentence if currentMajorId != None: #if currentMajorId == "GENIA.d114.s9": print "JAA" processedSentenceIds.add(currentMajorId) sentenceObject = corpus.sentencesById[currentMajorId] goldSentence = None if goldCorpus != None: goldSentence = goldCorpus.sentencesById[currentMajorId] self.writeXMLSentence( exampleQueue, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=goldSentence, exampleStyle=exampleStyle, structureAnalyzer=structureAnalyzer) # process queue progress.update( len(exampleQueue), "Writing examples (" + exampleQueue[-1][0] + "): ") exampleQueue = [] predictionsByExample = {} prevMajorIds.add(currentMajorId) assert majorId not in prevMajorIds, majorId currentMajorId = majorId exampleQueue.append(example) # queue example predictionsByExample[example[0]] = prediction assert example[3]["xtype"] == self.xType, str( example[3]["xtype"]) + "/" + str(self.xType) # Process what is still in queue if currentMajorId != None: processedSentenceIds.add(currentMajorId) sentenceObject = corpus.sentencesById[currentMajorId] goldSentence = None if goldCorpus != None: goldSentence = goldCorpus.sentencesById[currentMajorId] self.writeXMLSentence( exampleQueue, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=goldSentence, exampleStyle=exampleStyle, structureAnalyzer=structureAnalyzer) # process queue progress.update(len(exampleQueue), "Writing examples (" + exampleQueue[-1][0] + "): ") exampleQueue = [] predictionsByExample = {} # Process sentences with no examples (e.g. to clear interactions) for sentenceId in sorted(corpus.sentencesById.keys()): if sentenceId not in processedSentenceIds: sentenceObject = corpus.sentencesById[sentenceId] goldSentence = None if goldCorpus != None: goldSentence = goldCorpus.sentencesById[currentMajorId] self.writeXMLSentence([], {}, sentenceObject, classSet, classIds, goldSentence=goldSentence, exampleStyle=exampleStyle, structureAnalyzer=structureAnalyzer) # Print statistics if len(self.counts) > 0: print >> sys.stderr, self.counts self.counts = defaultdict(int) # Write corpus if outputFile != None: print >> sys.stderr, "Writing corpus to", outputFile ETUtils.write(corpus.rootElement, outputFile) return corpus.tree