def _process_wordclass_set(wordclass_set, id): varset = VARIANTS_CACHE.find(id=id, wordclass=wordclass_set.wordclass()) if varset: seen = set([type_unit.sort for type_unit in wordclass_set.types()]) bucket = [] for inflection in INFLECTIONS[wordclass_set.wordclass()]: for variant_form in varset.variants.get(inflection, []): if variant_form.sort in seen: continue if variant_form.date.end < MINIMUM_DATE: continue variant_form.wordclass = inflection bucket.append(variant_form) seen.add(variant_form.sort) for variant_form in bucket: morphset_node = etree.Element('morphSet', fragment='true') d1, d2 = variant_form.date.constrain((wordclass_set.date().start, wordclass_set.date().end)) daterange = DateRange(start=d1, end=d2, hardEnd=True) morphset_node.append(daterange.to_xml(omitProjected=True)) if variant_form.regional: morphset_node.set('regional', 'true') if variant_form.irregular: morphset_node.set('irregular', 'true') type_node = etree.SubElement(morphset_node, 'type') form_node = etree.SubElement(type_node, 'form') form_node.text = variant_form.form type_node.append(Wordclass(variant_form.wordclass).to_xml()) wordclass_set.morphset_block().append(morphset_node)
def _variant_date_node(variant_date, block_date): """ Return a date-range node for a variant form """ # Constrain the variant's dates, so that they don't # fall outside the limits of the parent entry start_date, end_date = variant_date.constrain((block_date.start, block_date.projected_end())) # Create a new date range with the constrained dates constrained_date_range = DateRange(start=start_date, end=end_date, hardEnd=True) return constrained_date_range.to_xml(omitProjected=True)
def construct_entry_node(self, entry): entry_node = etree.Element('e', odoLexid=entry.lexid) if entry.wordclass_blocks[0].wordclass == 'NP': entry_node.set('encyclopedic', 'true') lemma_node = etree.Element('lemma', src=self.dictname) lemma_node.text = entry.headword if entry.headword_us: lemma_node.set('locale', 'uk') lemma_node2 = etree.Element('lemma', locale='us', src=self.dictname) lemma_node2.text = entry.headword_us entry_node.append(lemma_node) entry_node.append(lemma_node2) else: entry_node.append(lemma_node) for block in entry.wordclass_blocks: # date node if entry.date is not None: daterange = DateRange(start=entry.date, end=END_DATE, estimated=False) if block.wordclass == 'NP': daterange.is_estimated = True local_date_node = etree.tostring(daterange.to_xml(omitProjected=True)) elif block.wordclass != 'NP': local_date_node = DEFAULT_DATE_NODE else: local_date_node = None wordclass_set_node = etree.SubElement(entry_node, 'wordclassSet') wordclass_set_node.append(Wordclass(block.wordclass).to_xml()) if local_date_node: wordclass_set_node.append(etree.fromstring(local_date_node)) morphsetblock_node = etree.Element('morphSetBlock') for morphgroup in block.morphgroups: morphset_node = etree.SubElement(morphsetblock_node, 'morphSet') if local_date_node: morphset_node.append(etree.fromstring(local_date_node)) for unit in morphgroup.morphunits: type_node = etree.SubElement(morphset_node, 'type') form_node = etree.SubElement(type_node, 'form') form_node.text = unit.form type_node.append(Wordclass(unit.wordclass).to_xml()) wordclass_set_node.append(morphsetblock_node) wordclass_set_node.append(self.definition_node(block)) wordclass_set_node.append(self.resource_node(block, entry.lexid)) return entry_node