Ejemplo n.º 1
0
    def _load_issue(self, mets_file):
        _logger.debug("parsing issue mets file: %s" % mets_file)
        doc = etree.parse(mets_file)

        # get the mods for the issue
        div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0]
        dmdid = div.attrib['DMDID']
        mods = dmd_mods(doc, dmdid)

        # set up a new Issue
        issue = Issue()
        issue.volume = mods.xpath(
            'string(.//mods:detail[@type="volume"]/mods:number[1])',
            namespaces=ns).strip()
        issue.number = mods.xpath(
            'string(.//mods:detail[@type="issue"]/mods:number[1])',
            namespaces=ns).strip()
        issue.edition = int(
            mods.xpath(
                'string(.//mods:detail[@type="edition"]/mods:number[1])',
                namespaces=ns))
        issue.edition_label = mods.xpath(
            'string(.//mods:detail[@type="edition"]/mods:caption[1])',
            namespaces=ns).strip()

        # parse issue date
        date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns)
        issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d')

        # attach the Issue to the appropriate Title
        lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])',
                          namespaces=ns).strip()
        try:
            title = Title.objects.get(lccn=lccn)
        except Exception, e:
            url = settings.MARC_RETRIEVAL_URLFORMAT % lccn
            logging.info("attempting to load marc record from %s", url)
            management.call_command('load_titles', url)
            title = Title.objects.get(lccn=lccn)
Ejemplo n.º 2
0
    def _load_issue(self, mets_file):
        _logger.debug("parsing issue mets file: %s" % mets_file)
        doc = etree.parse(mets_file)

        # get the mods for the issue
        div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0]
        dmdid = div.attrib['DMDID']
        mods = dmd_mods(doc, dmdid)

        # set up a new Issue
        issue = Issue()
        issue.volume = mods.xpath(
            'string(.//mods:detail[@type="volume"]/mods:number[1])',
            namespaces=ns).strip()
        issue.number = mods.xpath(
            'string(.//mods:detail[@type="issue"]/mods:number[1])',
            namespaces=ns).strip()
        issue.edition = int(mods.xpath(
                'string(.//mods:detail[@type="edition"]/mods:number[1])',
                namespaces=ns))
        issue.edition_label = mods.xpath(
                'string(.//mods:detail[@type="edition"]/mods:caption[1])',
                namespaces=ns).strip()

        # parse issue date
        date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns)
        issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d')

        # attach the Issue to the appropriate Title
        lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])',
            namespaces=ns).strip()
        try:
            title = Title.objects.get(lccn=lccn)
        except Exception, e:
            url = settings.MARC_RETRIEVAL_URLFORMAT % lccn
            logging.info("attempting to load marc record from %s", url)
            management.call_command('load_titles', url)
            title = Title.objects.get(lccn=lccn)
Ejemplo n.º 3
0
    def _load_issue(self, mets_file):
        _logger.debug("parsing issue mets file: %s" % mets_file)
        doc = etree.parse(mets_file)

        # get the mods for the issue
        div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0]
        dmdid = div.attrib['DMDID']
        mods = dmd_mods(doc, dmdid)

        # set up a new Issue
        issue = Issue()
        issue.volume = mods.xpath(
            'string(.//mods:detail[@type="volume"]/mods:number[1])',
            namespaces=ns).strip()
        issue.number = mods.xpath(
            'string(.//mods:detail[@type="issue"]/mods:number[1])',
            namespaces=ns).strip()
        issue.edition = int(
            mods.xpath(
                'string(.//mods:detail[@type="edition"]/mods:number[1])',
                namespaces=ns))
        issue.edition_label = mods.xpath(
            'string(.//mods:detail[@type="edition"]/mods:caption[1])',
            namespaces=ns).strip()

        # parse issue date
        date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns)
        issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d')

        # attach the Issue to the appropriate Title
        lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])',
                          namespaces=ns).strip()
        try:
            title = Title.objects.get(lccn=lccn)
        except Exception as e:
            url = settings.MARC_RETRIEVAL_URLFORMAT % lccn
            _logger.info("attempting to load marc record from %s", url)
            management.call_command('load_titles', url)
            title = Title.objects.get(lccn=lccn)

        issue.title = title

        issue.batch = self.current_batch
        issue.save()
        _logger.debug("saved issue: %s" % issue.url)

        notes = []
        for mods_note in mods.xpath('.//mods:note', namespaces=ns):
            type = mods_note.xpath('string(./@type)')
            label = mods_note.xpath('string(./@displayLabel)')
            text = mods_note.xpath('string(.)')
            note = models.IssueNote(type=type, label=label, text=text)
            notes.append(note)
        issue.notes.set(notes, bulk=False)
        issue.save()

        # attach pages: lots of logging because it's expensive
        for page_div in div.xpath('.//mets:div[@TYPE="np:page"]',
                                  namespaces=ns):
            try:
                page = self._load_page(doc, page_div, issue)
                self.pages_processed += 1
            except BatchLoaderException as e:
                _logger.exception(e)

        return issue