def _load_issue(self, mets_file): _logger.debug("parsing issue mets file: %s" % mets_file) doc = etree.parse(mets_file) # get the mods for the issue div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0] dmdid = div.attrib['DMDID'] mods = dmd_mods(doc, dmdid) # set up a new Issue issue = Issue() issue.volume = mods.xpath( 'string(.//mods:detail[@type="volume"]/mods:number[1])', namespaces=ns).strip() issue.number = mods.xpath( 'string(.//mods:detail[@type="issue"]/mods:number[1])', namespaces=ns).strip() issue.edition = int( mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:number[1])', namespaces=ns)) issue.edition_label = mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:caption[1])', namespaces=ns).strip() # parse issue date date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns) issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d') # attach the Issue to the appropriate Title lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])', namespaces=ns).strip() try: title = Title.objects.get(lccn=lccn) except Exception, e: url = settings.MARC_RETRIEVAL_URLFORMAT % lccn logging.info("attempting to load marc record from %s", url) management.call_command('load_titles', url) title = Title.objects.get(lccn=lccn)
def _load_issue(self, mets_file): _logger.debug("parsing issue mets file: %s" % mets_file) doc = etree.parse(mets_file) # get the mods for the issue div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0] dmdid = div.attrib['DMDID'] mods = dmd_mods(doc, dmdid) # set up a new Issue issue = Issue() issue.volume = mods.xpath( 'string(.//mods:detail[@type="volume"]/mods:number[1])', namespaces=ns).strip() issue.number = mods.xpath( 'string(.//mods:detail[@type="issue"]/mods:number[1])', namespaces=ns).strip() issue.edition = int(mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:number[1])', namespaces=ns)) issue.edition_label = mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:caption[1])', namespaces=ns).strip() # parse issue date date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns) issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d') # attach the Issue to the appropriate Title lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])', namespaces=ns).strip() try: title = Title.objects.get(lccn=lccn) except Exception, e: url = settings.MARC_RETRIEVAL_URLFORMAT % lccn logging.info("attempting to load marc record from %s", url) management.call_command('load_titles', url) title = Title.objects.get(lccn=lccn)
def _load_issue(self, mets_file): _logger.debug("parsing issue mets file: %s" % mets_file) doc = etree.parse(mets_file) # get the mods for the issue div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0] dmdid = div.attrib['DMDID'] mods = dmd_mods(doc, dmdid) # set up a new Issue issue = Issue() issue.volume = mods.xpath( 'string(.//mods:detail[@type="volume"]/mods:number[1])', namespaces=ns).strip() issue.number = mods.xpath( 'string(.//mods:detail[@type="issue"]/mods:number[1])', namespaces=ns).strip() issue.edition = int( mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:number[1])', namespaces=ns)) issue.edition_label = mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:caption[1])', namespaces=ns).strip() # parse issue date date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns) issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d') # attach the Issue to the appropriate Title lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])', namespaces=ns).strip() try: title = Title.objects.get(lccn=lccn) except Exception as e: url = settings.MARC_RETRIEVAL_URLFORMAT % lccn _logger.info("attempting to load marc record from %s", url) management.call_command('load_titles', url) title = Title.objects.get(lccn=lccn) issue.title = title issue.batch = self.current_batch issue.save() _logger.debug("saved issue: %s" % issue.url) notes = [] for mods_note in mods.xpath('.//mods:note', namespaces=ns): type = mods_note.xpath('string(./@type)') label = mods_note.xpath('string(./@displayLabel)') text = mods_note.xpath('string(.)') note = models.IssueNote(type=type, label=label, text=text) notes.append(note) issue.notes.set(notes, bulk=False) issue.save() # attach pages: lots of logging because it's expensive for page_div in div.xpath('.//mets:div[@TYPE="np:page"]', namespaces=ns): try: page = self._load_page(doc, page_div, issue) self.pages_processed += 1 except BatchLoaderException as e: _logger.exception(e) return issue