def test_cet(): """cElementTree""" _table = cet.Element('table') for row in table: tr = cet.SubElement(_table, 'tr') for c in row.values(): cet.SubElement(tr, 'td').text = str(c) cet.tostring(_table)
def test_cet(): """cElementTree""" _table = cet.Element("table") for row in table: tr = cet.SubElement(_table, "tr") for c in row.values(): cet.SubElement(tr, "td").text = str(c) cet.tostring(_table)
def test_kid_et(): """Kid template + cElementTree""" _table = cet.Element("table") for row in table: td = cet.SubElement(_table, "tr") for c in row.values(): cet.SubElement(td, "td").text = str(c) kid_tmpl2.table = _table kid_tmpl2.serialize(output="html")
def test_kid_et(): """Kid template + cElementTree""" _table = cet.Element('table') for row in table: td = cet.SubElement(_table, 'tr') for c in row.values(): cet.SubElement(td, 'td').text = str(c) kid_tmpl2.table = _table kid_tmpl2.serialize(output='html')
def addTokensToTree(tokens, element): for t in tokens: newToken = ElementTree.SubElement(element, "token") newToken.attrib["id"] = t.id newToken.attrib["text"] = t.text newToken.attrib["POS"] = t.pos newToken.attrib["charOffset"] = t.charOffset
def _node2xmlfields(self,noderecord): ''' fields in a node are written to xml fields; output is sorted according to grammar ''' if 'BOTSID' not in noderecord: raise botslib.OutMessageError(_(u'No field "BOTSID" in xml-output in: "$record"'),record=noderecord) #first generate the xml-'record' attributedict = {} recordtag = noderecord['BOTSID'] attributemarker = recordtag + self.ta_info['attributemarker'] for key,value in noderecord.items(): #find the attributes for the xml-record, put these in attributedict if key.startswith(attributemarker): attributedict[key[len(attributemarker):]] = value xmlrecord = ET.Element(recordtag,attributedict) #make the xml ET node if 'BOTSCONTENT' in noderecord: xmlrecord.text = noderecord['BOTSCONTENT'] del noderecord['BOTSCONTENT'] for key in attributedict.keys(): #remove used fields del noderecord[attributemarker+key] del noderecord['BOTSID'] #remove 'record' tag #generate xml-'fields' in xml-'record'; not sorted noderecordcopy = noderecord.copy() for key,value in noderecordcopy.items(): if key not in noderecord or self.ta_info['attributemarker'] in key: #if field not in outmessage: skip continue attributedict = {} attributemarker = key + self.ta_info['attributemarker'] for key2,value2 in noderecord.items(): if key2.startswith(attributemarker): attributedict[key2[len(attributemarker):]] = value2 ET.SubElement(xmlrecord, key,attributedict).text=value #add xml element to xml record for key2 in attributedict.keys(): #remove used fields del noderecord[attributemarker+key2] del noderecord[key] #remove xml entity tag return xmlrecord
def insertElements(corpus, specAnn): for document in corpus.iter('document'): docId = document.get("origId") assert docId in specAnn, docId for sentence in document.iter('sentence'): sentOffset = Range.charOffsetToSingleTuple( sentence.get("charOffset")) analyses = sentence.find("analyses") if not analyses: analyses = ET.SubElement(sentence, "analyses") #entitiesElement = sentence.find("entities") # Find the container container = analyses.find("entities") #None # for entitiesElement in entitiesElements: # if entitiesElement.get("source") == "SPECIES": # container = entitiesElement # break if not container: container = ET.SubElement(analyses, "entities") #container.set("source", "SPECIES") # Map the spans for span in specAnn[docId][:]: offset = span.get("offset") if Range.overlap(offset, sentOffset): if sentOffset[0] > offset[0] or sentOffset[1] < offset[1]: continue specAnn[docId].remove(span) charOffset = (offset[0] - sentOffset[0], offset[1] - sentOffset[0]) matchingText = sentence.get( "text")[charOffset[0]:charOffset[1]] spanText = span.get("text") #print matchingText, spanText assert matchingText == spanText, (matchingText, spanText, charOffset) span.set("charOffset", "-".join([str(x) for x in charOffset])) assert not "--" in span.get("charOffset"), [ str(x) for x in charOffset ] del span.attrib["offset"] #span.set("offset", "") container.append(span)
def addTokenization(tokenization, sentence, sentenceId): toks = sentence.find("sentenceanalyses/tokenizations") assert toks, "Missing <tokenizations> in sentence %s" % sentenceId # assume new-style XML format if there's at least one <tokenization> with # a "tokenizer" attribute. Also check duplicates. isNew = False for t in toks.getiterator("tokenization"): if t.get("tokenizer") is not None: assert t.get("tokenizer") is not None, "Split tokenization '%s' already exists in sentence %s!" % (tokenization, sentenceId) isNew = True if isNew: newTok = ElementTree.SubElement(toks, "tokenization") newTok.attrib["tokenizer"] = tokenization else: assert toks.find(tokenization) is None, "Split tokenization '%s' already exists in sentence %s!" % (tokenization, sentenceId) newTok = ElementTree.SubElement(toks, tokenization) return newTok
def addParse(parse, tokenization, sentence, sentenceId): # check whether the XML is new-style or old-style. parses = sentence.find("sentenceanalyses/parses") assert parses, "Missing <parses> in sentence %s" % sentenceId # assume new-style if we have at least one <parse> with a "parser" # attribute. Also check that a parse under the given name isn't there # already isNew = False for p in parses.getiterator("parse"): if p.get("parser") is not None: assert p.get("parser") != parse, "New parse '%s' already exists in sentence %s!" % (parse, sentenceId) isNew = True if isNew: newParse = ElementTree.SubElement(parses, "parse") newParse.attrib["parser"] = parse newParse.attrib["tokenizer"] = tokenization else: # check for overlap assert parses.find(parse) is None, "New parse '%s' already exists in sentence %s!" % (options.newparse, sentenceId) newParse = ElementTree.SubElement(parses, parse) return newParse
def makePath(element, tagList): #taglist is a list of tag names #a list of corresponding elements is returned #if these did not exist, they are created! # result = [] currElem = element for tag in tagList: for subElem in currElem: if subElem.tag == tag: break else: subElem = ElementTree.SubElement(currElem, tag) result.append(subElem) currElem = subElem return result
def updateXML(root, removeAnalyses=True): counts = defaultdict(int) for document in root.findall("document"): sentencePos = 0 counts["documents"] += 1 for sentence in document.findall("sentence"): counts["sentences"] += 1 # Remove the original parses analyses = sentence.find("sentenceanalyses") if analyses != None: counts["analyses"] += 1 if removeAnalyses: counts["removed-analyses"] += 1 sentence.remove(analyses) # Add an artifical sentence offset so that sentences can be exported as a single document sentenceText = sentence.get("text") sentence.set("charOffset", Range.tuplesToCharOffset((sentencePos, sentencePos + len(sentenceText)))) # Update the character offsets of all entities from the old format (begin,end) to the new one (begin,end+1) for entity in sentence.findall("entity"): counts["entities"] += 1 offsets = [(x[0], x[1] + 1) for x in Range.charOffsetToTuples(entity.get("charOffset"))] entityText = entity.get("text") for offset, entitySpan in zip(offsets, [sentenceText[x[0]:x[1]] for x in offsets]): counts["entity-offsets"] += 1 lenOffset = offset[1] - offset[0] offsetText, entityText = entityText[:lenOffset].strip(), entityText[lenOffset:].strip() assert offsetText == entitySpan, (offsets, (entity.get("text"), entitySpan), (offsetText, entityText), sentenceText) entity.set("charOffset", Range.tuplesToCharOffset(offsets)) # Convert positive pairs into interaction elements numInteractions = 0 for pair in sentence.findall("pair"): counts["pairs"] += 1 sentence.remove(pair) if pair.get("interaction") == "True": del pair.attrib["interaction"] pair.set("id", pair.get("id").rsplit(".", 1)[0] + ".i" + str(numInteractions)) pair.set("type", "PPI") ET.SubElement(sentence, "interaction", pair.attrib) numInteractions += 1 counts["interactions"] += 1 sentencePos += len(sentenceText) + 1 print >> sys.stderr, "Updated Interaction XML format:", dict(counts) return root
def _node2xmlfields(self, noderecord): ''' fields in a node are written to xml fields; output is sorted according to grammar ''' #first generate the xml-'record' #~ print 'record',noderecord['BOTSID'] attributedict = {} recordtag = noderecord['BOTSID'] attributemarker = recordtag + self.ta_info[ 'attributemarker'] #attributemarker is a marker in the fieldname used to find out if field is an attribute of either xml-'record' or xml-element #~ print ' rec_att_mark',attributemarker for key, value in noderecord.items( ): #find attributes belonging to xml-'record' and store in attributedict if key.startswith(attributemarker): #~ print ' record attribute',key,value attributedict[key[len(attributemarker):]] = value xmlrecord = ET.Element(recordtag, attributedict) #make the xml ET node if 'BOTSCONTENT' in noderecord: #BOTSCONTENT is used to store the value/text of the xml-record itself. xmlrecord.text = noderecord['BOTSCONTENT'] del noderecord['BOTSCONTENT'] for key in attributedict.keys(): #remove used fields del noderecord[attributemarker + key] del noderecord['BOTSID'] #remove 'record' tag #generate xml-'fields' in xml-'record'; sort these by looping over records definition for field_def in self.defmessage.recorddefs[ recordtag]: #loop over fields in 'record' if field_def[ ID] not in noderecord: #if field not in outmessage: skip continue #~ print ' field',field_def attributedict = {} attributemarker = field_def[ID] + self.ta_info['attributemarker'] #~ print ' field_att_mark',attributemarker for key, value in noderecord.items(): if key.startswith(attributemarker): print ' field attribute', key, value attributedict[key[len(attributemarker):]] = value ET.SubElement(xmlrecord, field_def[ID], attributedict).text = noderecord[ field_def[ID]] #add xml element to xml record for key in attributedict.keys(): #remove used fields del noderecord[attributemarker + key] del noderecord[field_def[ID]] #remove xml entity tag return xmlrecord
def convertChemProt(inDirs=None, setNames=None, outPath=None, goldTestSet=True, downloadDir=None, extractDir=None, redownload=False, debug=False): tempDir = None if inDirs == None: print >> sys.stderr, "---------------", "Downloading ChemProt files", "---------------" if extractDir == None: tempDir = tempfile.mkdtemp() inDirs = [] for setName in ("TRAIN", "DEVEL", "TEST"): if goldTestSet and setName == "TEST": setName = "TEST_GOLD" if Settings.URL["CP17_" + setName] != None: currentExtractDir = extractDir if extractDir else tempDir currentExtractDir = os.path.join(currentExtractDir, setName.lower()) inDirs.append( downloadFile(Settings.URL["CP17_" + setName], downloadDir, currentExtractDir, redownload)) print >> sys.stderr, "Reading ChemProt corpus from input", inDirs, "using dataset mapping", setNames dataSets = OrderedDict() for inDir in inDirs: print >> sys.stderr, "Reading input directory", inDir filenames = os.listdir(inDir) filetypes = ["_abstracts", "_entities", "_relations"] # Collect the file paths for the data types dirDataSets = set() for filename in filenames: if not (filename.endswith(".tsv") and any([x in filename for x in filetypes])): continue dataSetId, dataType = filename.replace("_gs", "").rsplit("_", 1) if setNames != None: dataSetId = setNames.get(dataSetId, dataSetId) dirDataSets.add(dataSetId) dataType = dataType.split(".")[0] if dataSetId not in dataSets: dataSets[dataSetId] = {} assert dataType not in dataSets[dataSetId] dataSets[dataSetId][dataType] = os.path.join(inDir, filename) print >> sys.stderr, "Found ChemProt datasets", list( dirDataSets), "at", inDir print >> sys.stderr, "Read datasets:", dataSets.keys() # Build the Interaction XML print >> sys.stderr, "Converting to Interaction XML" corpusName = "CP17" corpus = ET.Element("corpus", {"source": corpusName}) counts = defaultdict(int) docById = {} entityById = {} entitiesByDoc = {} docsWithErrors = set() for dataSetId in sorted(dataSets.keys()): prevCounts = copy.copy(counts) print >> sys.stderr, "---", "Building elements for dataset", dataSetId, "---" dataSet = dataSets[dataSetId] counts["sets"] += 1 with open(dataSet["abstracts"], "rt") as f: print >> sys.stderr, "Adding document elements for dataset", dataSetId for row in UnicodeDictReader( f, delimiter="\t", fieldnames=["id", "title", "abstract"], quoting=csv.QUOTE_NONE): document = ET.Element( "document", { "id": corpusName + ".d" + str(counts["documents"]), "origId": row["id"], "set": dataSetId }) document.set("text", row["title"] + " " + row["abstract"]) document.set("titleOffset", Range.tuplesToCharOffset((0, len(row["title"])))) if document.get("origId") in docById: assert document.get("text") == docById[document.get( "origId")].get("text") assert document.get("titleOffset") == docById[document.get( "origId")].get("titleOffset") counts["duplicate-documents"] += 1 else: corpus.append(document) docById[document.get("origId")] = document counts["documents"] += 1 with open(dataSet["entities"], "rt") as f: print >> sys.stderr, "Adding entity elements for dataset", dataSetId for row in UnicodeDictReader( f, delimiter="\t", fieldnames=["docId", "id", "type", "begin", "end", "text"], quoting=csv.QUOTE_NONE): document = docById[row["docId"]] assert row["type"] in ("CHEMICAL", "GENE-Y", "GENE-N") # Check for duplicate entities if row["docId"] not in entitiesByDoc: entitiesByDoc[row["docId"]] = set() assert row["id"] not in entitiesByDoc[row["docId"]] entitiesByDoc[row["docId"]].add(row["id"]) # Determine the offset offset = (int(row["begin"]), int(row["end"])) docSpan = document.get("text")[offset[0]:offset[1]] if docSpan == row["text"]: entity = ET.SubElement( document, "entity", { "id": document.get("id") + ".e" + str(len([x for x in document.findall("entity")])) }) entity.set("given", "True") entity.set("origId", row["id"]) entity.set("type", row["type"].split("-")[0]) entity.set( "normalized", "True" if row["type"].endswith("-Y") else "False") entity.set( "charOffset", Range.tuplesToCharOffset((offset[0], offset[1]))) entity.set("text", row["text"]) if row["docId"] not in entityById: entityById[row["docId"]] = {} assert entity.get("origId") not in entityById[row["docId"]] entityById[row["docId"]][entity.get("origId")] = entity counts["entities"] += 1 else: print >> sys.stderr, "Alignment error in document", row[ "docId"], (offset, docSpan, row) counts["entities-error"] += 1 docsWithErrors.add(row["docId"]) if "relations" in dataSet: print >> sys.stderr, "Adding relation elements for dataset", dataSetId with open(dataSet["relations"], "rt") as f: for row in UnicodeDictReader(f, delimiter="\t", fieldnames=[ "docId", "group", "groupEval", "type", "arg1", "arg2" ], quoting=csv.QUOTE_NONE): for argId in ("1", "2"): assert row["arg" + argId].startswith("Arg" + argId + ":") row["arg" + argId] = row["arg" + argId][5:] document = docById[row["docId"]] e1 = entityById[row["docId"]].get(row["arg1"]) e2 = entityById[row["docId"]].get(row["arg2"]) if e1 != None and e2 != None: interaction = ET.SubElement( document, "interaction", { "id": document.get("id") + ".i" + str( len([ x for x in document.findall( "interaction") ])) }) interaction.set("directed", "True") interaction.set("type", row["group"]) interaction.set("relType", row["type"]) row["groupEval"] = row["groupEval"].strip() assert row["groupEval"] in ("Y", "N") interaction.set( "evaluated", "True" if row["groupEval"] == "Y" else "False") interaction.set("e1", e1.get("id")) interaction.set("e2", e2.get("id")) counts["interactions"] += 1 else: counts["interaction-error"] += 1 docsWithErrors.add(row["docId"]) else: print >> sys.stderr, "No relations for dataset", dataSetId print >> sys.stderr, "dataset", dataSetId, { x: counts[x] - prevCounts.get(x, 0) for x in counts if counts[x] - prevCounts.get(x, 0) > 0 } if len(docsWithErrors) > 0: counts["documents-with-errors"] = len(docsWithErrors) print >> sys.stderr, "---", "All Datasets Done", "---" print >> sys.stderr, "ChemProt conversion:", dict(counts) if tempDir != None and not debug: print >> sys.stderr, "Removing temporary directory", tempDir shutil.rmtree(tempDir) if outPath != None: ETUtils.write(corpus, outPath) return ET.ElementTree(corpus)
def eventsToNewXML(events): xml = ET.Element("corpus") xml.set("source", "Static Relations") docCount = 0 sentenceById = {} for sentenceId in sorted(events.keys()): entities = [] interactions = [] entityByOffset = {} for event in events[sentenceId]: #print event if sentenceId not in sentenceById: document = ET.SubElement(xml, "document") document.set("id", "SR.d"+str(docCount)) document.set("origId", sentenceId) document.set("set", event["dataSet"]) sentence = ET.SubElement(document, "sentence") sentence.set("id", "SR.d"+str(docCount)+".s"+str(docCount)) sentence.set("origId", sentenceId) sentence.set("text", event["text"]) sentence.set("charOffset", "0-"+str(len(event["text"])-1)) docCount += 1 sentenceById[sentenceId] = sentence else: sentence = sentenceById[sentenceId] assert sentence.get("text") == event["text"], (sentence.get("text"), event["text"]) # Add entities e1Offset = event["entity"].split("\t")[0] e2Offset = event["namedEntity"].split("\t")[0] if e1Offset not in entityByOffset: e1 = ET.Element("entity") e1.set("text", event["entity"].split("\t")[1].strip()) e1.set("id", sentence.get("id")+".e"+str(len(entities))) offset = getOffset(event["entity"].split("\t")[0]) assert sentence.get("text")[offset[0]:offset[1]+1] == e1.get("text"), (event, sentence.get("text"), e1.get("text")) e1.set("charOffset", str(offset[0]) + "-" + str(offset[1])) e1.set("isName", "False") e1.set("type", "Entity") entities.append(e1) entityByOffset[e1Offset] = e1 else: e1 = entityByOffset[e1Offset] if e2Offset not in entityByOffset: e2 = ET.Element("entity") e2.set("text", event["namedEntity"].split("\t")[1].strip()) e2.set("id", sentence.get("id")+".e"+str(len(entities))) offset = getOffset(event["namedEntity"].split("\t")[0]) assert sentence.get("text")[offset[0]:offset[1]+1] == e2.get("text"), (event, sentence.get("text"), e2.get("text")) e2.set("charOffset", str(offset[0]) + "-" + str(offset[1])) e2.set("isName", "True") e2.set("type", "Protein") entities.append(e2) entityByOffset[e2Offset] = e2 else: e2 = entityByOffset[e2Offset] # Add interactions interaction = ET.Element("interaction") interaction.set("id", sentence.get("id")+".i"+str(len(interactions))) interaction.set("origId", event["id"]) interaction.set("type", event["eventType"]) interaction.set("e1", e1.get("id")) interaction.set("e2", e2.get("id")) interactions.append(interaction) for entity in entities: sentence.append(entity) for interaction in interactions: sentence.append(interaction) return xml
def string_variables_dict_to_xml_element(string_variables_dict): ''' Convert string variables dictionary to xml element @author: Canh Duong <*****@*****.**> :param string_variables_dict: string_variables_dict = { 'string0': { 'name': 'string0', 'original_text': 'Calculate', 'default': 'Calculate', 'value': 'Calculate', 'context': 'context1', 'context_list': { 'context0': { 'name': "Context 1 of Calculate", 'help': "Synonym set 1", 'synonyms': ['Calculate', 'Compute'], 'select': 'true', }, 'context1': { 'name': "Context 2 of Calculate", 'help': "Synonym set 2", 'synonyms': ['Find', 'Figure out', 'Estimate'], 'select': 'false', } } }, 'string1': { 'name': 'string1', 'original_text': 'apple', 'default': 'apple', 'value': 'apple', 'context': 'context0', 'context_list': { 'context0': { 'name': "Context 1 - Fruits", 'help': "Synonym set 1", 'synonyms': ['apple', 'mango'], 'select': 'true', }, 'context1': { 'name': "Context 2 - Computer", 'help': "Synonym set 2", 'synonyms': ['Apple', 'IBM', 'Google', 'GCS'], 'select': 'false', } } } } :return: string_variables_elem - xml element <string_variables> <string_variable default="car" name="string0" original_text="car" value="car"> <context_list> <context name="Synonyms of text 'car' (Default)" select="true"> <option>car</option> <option>machine</option> <option>truck</option> <option>auto</option> <option>automobile</option> <option>electric car</option> </context> </context_list> </string_variable> <string_variable default="house" name="string1" original_text="house" value="house"> <context_list> <context name="Synonyms of text 'house' (Default)" select="true"> <option>house</option> <option>palace</option> <option>building</option> <option>land</option> <option>island</option> </context> </context_list> </string_variable> </string_variables> ''' # xml elements string_variables_elem = ET.Element('string_variables') # Append child for var_name, string_var in string_variables_dict.iteritems(): # create sub-element string_variable_elem = ET.SubElement(string_variables_elem, 'string_variable') # Set attributes here string_variable_elem.set('name', var_name) string_variable_elem.set('original_text', string_var['original_text']) string_variable_elem.set('value', string_var['value']) string_variable_elem.set('default', string_var['default']) # create sub-element context_list_elem = ET.SubElement(string_variable_elem, 'context_list') # Append child for context_id, context in string_var['context_list'].iteritems(): # create sub-element context_elem = ET.SubElement(context_list_elem, 'context') # Set attributes here context_elem.set('name', context['name']) # context_elem.text(context['help']) if context_id == string_var['context']: context_elem.set('select', 'true') else: context_elem.set('select', 'false') # Append child for word in context['synonyms']: # create sub-element option = ET.SubElement(context_elem, 'option') # Set text value here option.text = word return string_variables_elem
def convert_data_from_dict_to_xml(data): ''' Convert data fields from dictionary to XML when perform studio submission on Basic Template tab. The output will be updated to value of Advanced Editor. 1. problem description 2. Image url 3. variables (name, min_value, max_value, type, decimal_places) 4. answer_template_string :param data -- a dictionary of fields supported for raw editor :return: <problem> <description>Given a = [a] and b = [b]. Calculate the [sum], [difference] of a and b. </description> <images> <image_url link="http://example.com/image1">Image 1</image_url> <image_url link="http://example.com/image2">Image 2</image_url> </images> <variables> <variable name="a" min="1" max="200" type="integer"/> <variable name="b" min="1.0" max="20.5" type="float" decimal_places="2"/> </variables> <answer_templates> <answer sum = "[a] + [b]" difference = "[a] - [b]">Teacher's answer</answer> </answer_templates> </problem> ''' print("## CALLING FUNCTION convert_data_from_dict_to_xml() ##") # print("Input data type: {}".format(type(data))) print("Input data: {}".format(data)) xml_string = '' # init the root element: problem problem = ET.Element('problem') # convert question template field_question_template = data['question_template'] description = ET.SubElement(problem, 'description') description.text = field_question_template # Convert answer template tring to dictionary, # then build xml string for raw edit fields field_answer_template = data['answer_template'] # Check for empty input if not field_answer_template: raise JsonHandlerError(400, "Answer template must not be empty") # Handle answer template # Parse and convert answer template string to dictionary first answer_template_dict = {} answer_template_list = field_answer_template.split('\n') for answer in answer_template_list: # only process if not empty, ignore empty answer template if answer: # answer template must contains '=' character if (answer.find('=') != -1): # found '=' at lowest index of string answer_attrib_list = answer.split('=') # print "answer_attrib_list = " # print(answer_attrib_list) answer_attrib_key = answer_attrib_list[0] answer_attrib_value = answer_attrib_list[1] # print "answer_attrib_key = " # print(answer_attrib_key) # print "answer_attrib_value = " # print(answer_attrib_value) # Remove unexpected white spaces answer_attrib_key = answer_attrib_key.lstrip( ) # all leading whitespaces are removed from the string. answer_attrib_key = answer_attrib_key.rstrip( ) # all ending whitespaces are removed from the string. answer_attrib_value = answer_attrib_value.lstrip( ) # all leading whitespaces are removed from the string. answer_attrib_value = answer_attrib_value.rstrip( ) # all ending whitespaces are removed from the string. # print "REMOVED SPACES, answer_attrib_key = " # print(answer_attrib_key) # print "REMOVED SPACES,answer_attrib_value = " # print(answer_attrib_value) # Add answer attribute to dict answer_template_dict[answer_attrib_key] = answer_attrib_value else: raise JsonHandlerError( 400, "Unsupported answer format. Answer template must contains '=' character: {}" .format(answer)) # print("Resulted answer_template_dict: {}".format(answer_template_dict)) # Answer template xml elements answer_templates = ET.SubElement(problem, 'answer_templates') answer = ET.SubElement(answer_templates, 'answer') # Add the converted dict data to xml elements for attrib_key, attrib_value in answer_template_dict.iteritems(): answer.set(attrib_key, attrib_value) answer.text = "Teacher's answer" # Convert numeric variables field_variables = data['variables'] # xml elements variables_elem = ET.SubElement(problem, 'variables') for var_name, attributes in field_variables.iteritems(): var_name = ET.SubElement(variables_elem, 'variable') for attribute, value in attributes.iteritems(): # Set attribute var_name.set(attribute, value) # Convert string variables dictionary to xml string field_string_variables = data['string_variables'] # xml elements string_variables_elem = string_variables_dict_to_xml_element( field_string_variables) # Adds the element subelement to the end of this elements internal list of subelements. problem.append(string_variables_elem) # convert image field_image_url = data['image_url'] # xml elements images = ET.SubElement(problem, 'images') image_url = ET.SubElement(images, 'image_url') # Set attribute image_url.set('link', field_image_url) # print "before indent, Problem elem dum = ", ET.dump(problem) indented_problem = indent(problem) # print "after indent ,Problem elem dum = ", ET.dump(indented_problem) xml_string = ET.tostring(indented_problem) # print "Output xml string = ", xml_string print("## End FUNCTION convert_data_from_dict_to_xml() ##") return xml_string
def process(self, directed, negatives): # Build the entities for tag in ("e1", "e2"): self.entities.append(self._getEntity(self.text, tag)) self.text = self.text.replace("<" + tag + ">", "").replace("</" + tag + ">", "") # Check entity offsets for entity in self.entities: begin, end = [int(x) for x in entity.get("charOffset").split("-")] assert entity.get("text") == self.text[begin:end], ( entity.get("text"), self.text, self.text[begin:end], [begin, end]) assert len(self.entities) == 2 eMap = {"e1": self.entities[0], "e2": self.entities[1]} for key in eMap: # Check that e1 == e1 and e2 == e2 assert eMap[key].get("id").endswith("." + key) # Build the sentence docElem = ET.Element("document", { "id": self.corpusId + ".d" + self.origId, "set": self.setName }) sentElem = ET.SubElement( docElem, "sentence", { "id": self.id, "charOffset": "0-" + str(len(self.text)), "text": self.text, "origId": self.origId }) sentElem.set("relation", self.relation) if self.comment != None and self.comment != "": sentElem.set("comment", self.comment) for entity in self.entities: sentElem.append(entity) # Determine interaction types per direction relFrom, relTo = "", "" if self.relation == "Other": sentElem.append( self._getInteraction(self.relation, "e1", "e2", directed, 0, eMap, relFrom, relTo)) else: relType, rest = self.relation.strip(")").split("(") relFrom, relTo = rest.split(",") reverse = (relFrom == "e2" and relTo == "e1") if not reverse: assert relFrom == "e1" and relTo == "e2" forwardType = self.mergeType(relType, relFrom, relTo) if ( negatives == "REVERSE_POS") else relType reverseType = self.mergeType(relType, relTo, relFrom) if ( negatives == "REVERSE_POS") else "neg" else: forwardType = self.mergeType(relType, relFrom, relTo) if ( negatives == "REVERSE_POS") else "neg" reverseType = self.mergeType(relType, relTo, relFrom) if ( negatives == "REVERSE_POS") else relType # Build the interactions if directed: if forwardType != "neg" or negatives == "INCLUDE": sentElem.append( self._getInteraction(forwardType, "e1", "e2", directed, 0, eMap, "e1", "e2")) if reverseType != "neg" or negatives == "INCLUDE": sentElem.append( self._getInteraction(reverseType, "e2", "e1", directed, 1, eMap, "e2", "e1")) else: sentElem.append( self._getInteraction(self.relation, "e1", "e2", directed, 0, eMap, relFrom, relTo)) return docElem
# make a mapping from original to split token ids. Note that for # split tokens, only the last of the split parts will be stored. tokenIdMap = {} for t in split: tokenIdMap[t.origId] = t.id # make a copy of the specified parse that refers to the split tokens # instead of the originals. newparse = addParse(options.newparse, options.splittokenization, sentence, sId) depSeqId = 1 for d in parse.getiterator("dependency"): t1, t2, dType = d.get("t1"), d.get("t2"), d.get("type") assert t1 in tokenIdMap and t2 in tokenIdMap, "INTERNAL ERROR" dep = ElementTree.SubElement(newparse, "dependency") dep.attrib["t1"] = tokenIdMap[t1] dep.attrib["t2"] = tokenIdMap[t2] dep.attrib["type"] = dType dep.attrib["id"] = "split_%d" % depSeqId depSeqId += 1 # Add in new dependencies between the split parts. for t in [tok for tok in split if tok.splitFrom is not None]: dep = ElementTree.SubElement(newparse, "dependency") dep.attrib["t1"] = t.id dep.attrib["t2"] = t.splitFrom dep.attrib["type"] = splitDepName indent(root) tree.write(sys.stdout)