def next_object(self, element): """Return the next DataOU object.""" result = DataOU() # IVR 2007-12-24 FIXME: One of the attributes is special, and tags the # OU's intended usage code (bruksområde). Find out which attribute # this is. # Iterate over *all* subelements for sub in element.getiterator(): value = None if sub.text: value = sub.text.strip().encode("latin1") if sub.tag == "Stedkode": sko = make_sko(value) if sko is not None: result.add_id(self.tag2type[sub.tag], sko) else: # invalid value for the <Stedkode> tag if self.logger: self.logger.warn( 'Detected XML <Stedkode> ' 'tag with invalid value: %s', value) elif sub.tag == "Overordnetstedkode": sko = make_sko(value) if sko is not None: result.parent = (result.NO_SKO, make_sko(value)) elif sub.tag == "Navn": for name in self._make_names(sub): result.add_name(name) elif sub.tag in ("Adresse", ): result.add_address(self._make_address(sub)) elif sub.tag in ("Startdato", "Sluttdato"): date = self._make_mxdate(sub.text, format="%Y-%m-%d") if sub.tag == "Startdato": result.start_date = date else: result.end_date = date # Whether the OU can be published in various online directories result.publishable = False for tmp in element.findall(".//Bruksomrade/Type"): if tmp.text == "Tillatt Organisasjon": result.publishable = True # <StedType> tell us how an OU can be used. This information is # represented in Cerebrum with the help of spreads and can be # accessed via the EntitySpread interface. result.add_usage_code(tmp.text) celems = element.findall("Kommunikasjon") for sub in celems: ct = self._make_contact(sub) if ct: result.add_contact(ct) # We require an OU to have a name. # Ideally, the information about expired OUs should be complete as # well, but we won't be this lucky in our lifetimes. So, for expired # OUs we won't care about the names. # Neither do we care about the missing names of not yet active # OUs; we choose to hope that the names will be in place when # the OU becomes active. if result.get_name(DataOU.NAME_LONG) is None: ou_no_sko_str = result.get_id(DataOU.NO_SKO) if not ou_no_sko_str: ou_no_sko_str = 'Missing a valid NO_SKO value' if result.end_date and result.end_date < now(): if self.logger: self.logger.debug("No name for expired OU %s", ou_no_sko_str) elif result.start_date and result.start_date > now(): if self.logger: self.logger.debug("No name for future OU %s", ou_no_sko_str) else: if self.logger: self.logger.warn("No name available for OU %s", ou_no_sko_str) return None return result
def next_object(self, element): def get_value(element_value): return ensure_unicode(element_value, self.encoding) def extract(element_attr): return get_value(element.get(element_attr, "")) result = DataOU() # A lot of data is buried in attributes # Own ID -- sko sko = tuple([int(element.get(x)) for x in ("fakultetnr", "instituttnr", "gruppenr")]) result.add_id(result.NO_SKO, sko) # Parent ID -- sko sko = tuple([int(element.get(x)) for x in ("fakultetnr_for_org_sted", "instituttnr_for_org_sted", "gruppenr_for_org_sted")]) result.parent = (result.NO_SKO, sko) # Some weird ID if element.get("nsd_kode"): result.add_id(result.NO_NSD, get_value(element.get("nsd_kode"))) # Activity period result.start_date = self._make_mxdate(element.get("dato_opprettet")) result.end_date = self._make_mxdate(element.get("dato_nedlagt")) # Accessibility for catalogues if element.get("opprettetmerke_for_oppf_i_kat"): result.publishable = True for name_kind, candidates, lang in ((result.NAME_LONG, ("stedlangnavn_bokmal", "stedkortnavn_bokmal", "stednavnfullt", "stednavn"), "nb"), (result.NAME_LONG, ("stedlangnavn_engelsk", "stedkortnavn_engelsk"), "en"), (result.NAME_ACRONYM, ("akronym",), "nb"), (result.NAME_SHORT, ("forkstednavn",), "nb")): value = self._pull_name(element, *candidates) if value: result.add_name(DataName(name_kind, value, lang)) for (xmlkind, kind) in (("besok", DataAddress.ADDRESS_BESOK), ("intern", DataAddress.ADDRESS_POST)): zip = extract("poststednr_%s_adr" % xmlkind) street = None if xmlkind == "intern": try: p_o_box = int(extract("stedpostboks")) if p_o_box and int(zip) // 100 == 3: street = "Postboks %d Blindern" % p_o_box except ValueError: pass if street is None: street = (extract("adresselinje1_%s_adr" % xmlkind), extract("adresselinje2_%s_adr" % xmlkind)) result.add_address( DataAddress(kind=kind, street=street, zip=zip, city=extract("poststednavn_%s_adr" % xmlkind), country=extract("landnavn_%s_adr" % xmlkind))) # FIXME: priority assignment is a bit random at the moment. priority = 0 for sub in element.findall("komm"): ct = self._make_contact(sub) if ct: kind, value = ct result.add_contact(DataContact(kind, get_value(value), priority)) priority += 1 return result
def next_object(self, element): """Return the next DataOU object.""" result = DataOU() # IVR 2007-12-24 FIXME: One of the attributes is special, and tags the # OU's intended usage code (bruksområde). Find out which attribute # this is. # Iterate over *all* subelements for sub in element.getiterator(): value = None if sub.text: value = ensure_unicode(sub.text.strip(), self.encoding) if sub.tag == "Stedkode": sko = make_sko(value) if sko is not None: result.add_id(self.tag2type[sub.tag], sko) else: # invalid value for the <Stedkode> tag if self.logger: self.logger.warn( 'Detected XML <Stedkode> ' 'tag with invalid value: %s', value ) elif sub.tag == "Overordnetstedkode": sko = make_sko(value) if sko is not None: result.parent = (result.NO_SKO, sko) elif sub.tag == "Navn": for name in self._make_names(sub): result.add_name(name) elif sub.tag in ("Adresse",): result.add_address(self._make_address(sub)) elif sub.tag in ("Startdato", "Sluttdato"): date = self._make_mxdate(sub.text, format="%Y-%m-%d") if sub.tag == "Startdato": result.start_date = date else: result.end_date = date # Whether the OU can be published in various online directories result.publishable = False for tmp in element.findall(".//Bruksomrade/Type"): if tmp.text == "Tillatt Organisasjon": result.publishable = True # <StedType> tell us how an OU can be used. This information is # represented in Cerebrum with the help of spreads and can be # accessed via the EntitySpread interface. result.add_usage_code(tmp.text) celems = element.findall("Kommunikasjon") for sub in celems: ct = self._make_contact(sub) if ct: result.add_contact(ct) # We require an OU to have a name. # Ideally, the information about expired OUs should be complete as # well, but we won't be this lucky in our lifetimes. So, for expired # OUs we won't care about the names. # Neither do we care about the missing names of not yet active # OUs; we choose to hope that the names will be in place when # the OU becomes active. if result.get_name(DataOU.NAME_LONG) is None: ou_no_sko_str = result.get_id(DataOU.NO_SKO) if not ou_no_sko_str: ou_no_sko_str = 'Missing a valid NO_SKO value' if result.end_date and result.end_date < now(): if self.logger: self.logger.debug("No name for expired OU %s", ou_no_sko_str) elif result.start_date and result.start_date > now(): if self.logger: self.logger.debug("No name for future OU %s", ou_no_sko_str) else: if self.logger: self.logger.warn("No name available for OU %s", ou_no_sko_str) return None return result
def next_object(self, element): def get_value(element_value): return ensure_unicode(element_value, self.encoding) def extract(element_attr): return get_value(element.get(element_attr, "")) result = DataOU() # A lot of data is buried in attributes # Own ID -- sko sko = tuple([ int(element.get(x)) for x in ("fakultetnr", "instituttnr", "gruppenr") ]) result.add_id(result.NO_SKO, sko) # Parent ID -- sko sko = tuple([ int(element.get(x)) for x in ("fakultetnr_for_org_sted", "instituttnr_for_org_sted", "gruppenr_for_org_sted") ]) result.parent = (result.NO_SKO, sko) # Some weird ID if element.get("nsd_kode"): result.add_id(result.NO_NSD, get_value(element.get("nsd_kode"))) # Activity period result.start_date = self._make_mxdate(element.get("dato_opprettet")) result.end_date = self._make_mxdate(element.get("dato_nedlagt")) # Accessibility for catalogues if element.get("opprettetmerke_for_oppf_i_kat"): result.publishable = True for name_kind, candidates, lang in ((result.NAME_LONG, ("stedlangnavn_bokmal", "stedkortnavn_bokmal", "stednavnfullt", "stednavn"), "nb"), (result.NAME_LONG, ("stedlangnavn_engelsk", "stedkortnavn_engelsk"), "en"), (result.NAME_ACRONYM, ("akronym", ), "nb"), (result.NAME_SHORT, ("forkstednavn", ), "nb")): value = self._pull_name(element, *candidates) if value: result.add_name(DataName(name_kind, value, lang)) for (xmlkind, kind) in (("besok", DataAddress.ADDRESS_BESOK), ("intern", DataAddress.ADDRESS_POST)): zip = extract("poststednr_%s_adr" % xmlkind) street = None if xmlkind == "intern": try: p_o_box = int(extract("stedpostboks")) if p_o_box and int(zip) // 100 == 3: street = "Postboks %d Blindern" % p_o_box except ValueError: pass if street is None: street = (extract("adresselinje1_%s_adr" % xmlkind), extract("adresselinje2_%s_adr" % xmlkind)) result.add_address( DataAddress(kind=kind, street=street, zip=zip, city=extract("poststednavn_%s_adr" % xmlkind), country=extract("landnavn_%s_adr" % xmlkind))) # FIXME: priority assignment is a bit random at the moment. priority = 0 for sub in element.findall("komm"): ct = self._make_contact(sub) if ct: kind, value = ct result.add_contact( DataContact(kind, get_value(value), priority)) priority += 1 return result