def parse_psection(self, psection, parts, source): parts.append(psection.xpath('string(enum)')) psection_id = psection.attrib['id'] # Get references ref_laws = [] for ref in psection.xpath('text/aref'): for subref in ref.xpath('subref'): if subref.attrib['type'] == 'title': match = re.match( r"usc_sup_01_([^_])", subref.attrib['target']) if match: (title,) = match.groups() title = title.lstrip('0') section = "" ref_psec_id = "" else: continue elif subref.attrib['type'] in ['sec', 'psec']: match = re.match( r"usc_sec_(?P<title>\d+)_(?P<section>[^-]+)-*(?P<section2>[0-9A-Za-z]*)-?(?:\#(?P<psection>\w+))?", subref.attrib['target']) if not match: continue (title, sec1, sec2, ref_psec_id) = match.groups() title = title.lstrip('0') section = sec1.lstrip('0') + sec2.rstrip('0') ref_psec_id = ref_psec_id or "" else: continue matches = Law.objects.filter( title=title, section=section, psection=ref_psec_id) if len(matches) == 0: ref_law = Law.objects.create( title=title, section=section, psection=ref_psec_id, order=0) else: ref_law = matches[0] ref_laws.append(ref_law) for sub_element in psection: if sub_element.tag in ["text", "head"]: self.ordering += 1 matches = Law.objects.filter( title=self.title, section=self.section, psection=psection_id) if len(matches) == 1 and not matches[0].source: law = matches[0] else: law = Law( title=self.title, section=self.section, psection=psection_id) law.level = int(psection.attrib['lev']) law.text = unicode(sub_element.xpath('string()') or "") law.order = self.ordering law.source = source law.set_name(parts) law.save() elif sub_element.tag == "psection": self.parse_psection(sub_element, parts, source) if ref_laws: first = Law.objects.filter(title=self.title, section=self.section, psection=psection_id)[0] first.references = ref_laws parts.pop()