Example #1
0
def _title_range(reel):
    agg = models.Issue.objects.filter(pages__reel=reel).distinct().aggregate(
        mn=Min('date_issued'), mx=Max('date_issued'))
    if agg['mn'] and agg['mx']:
        mn = strftime_safe(agg['mn'], '%b %d, %Y')
        mx = strftime_safe(agg['mx'], '%b %d, %Y')
        return "%s - %s" % (mn, mx)
    else:
        return ""
Example #2
0
 def __str__(self):
     parts = ['%s' % self.issue.title]
     parts.append(strftime_safe(self.issue.date_issued, '%B %d, %Y'))
     if self.issue.edition_label:
         parts.append(self.issue.edition_label)
     if self.section_label:
         parts.append(self.section_label)
     parts.append('Image %s' % self.sequence)
     return ', '.join(parts)
Example #3
0
def label(instance):
    if isinstance(instance, models.Title):
        return '%s (%s) %s-%s' % (instance.display_name,
                                  instance.place_of_publication,
                                  instance.start_year, instance.end_year)
    elif isinstance(instance, models.Issue):
        parts = []
        parts.append(strftime_safe(instance.date_issued, '%B %d, %Y'))
        if instance.edition_label:
            parts.append("%s" % instance.edition_label)
        return ', '.join(parts)
    elif isinstance(instance, models.Page):
        parts = []
        if instance.section_label:
            parts.append(instance.section_label)
        if instance.number:
            parts.append('Page %s' % instance.number)
        parts.append('Image %s' % instance.sequence)
        return ', '.join(parts)
    else:
        return "%s" % instance
Example #4
0
    def json(self, host, serialize=True):
        j = {
            "@context": "http://iiif.io/api/presentation/2/context.json",
            "@id": settings.BASE_URL + self.json_url,
            "@type": "sc:Collection",
            "label": self.display_name,
            "manifests": [],
            "metadata": self.metadata
        }

        for issue in self.issues.all():
            j["manifests"].append({
                "@id":
                settings.BASE_URL + issue.json_url,
                "@type":
                "sc:Manifest",
                "label":
                strftime_safe(issue.date_issued, '%Y-%m-%d')
            })

        if serialize:
            return json.dumps(j, indent=2)
        return j
    def test_load_batch(self):
        # Extract mini-batch tarball to /tmp somewhere
        tarpath = os.path.join(os.path.dirname(core.__file__), 'test-data', 'testbatch.tgz')
        tar = tarfile.open(tarpath)
        tar.extractall(path = BatchLoaderTest.batchDir)
        tar.close()
        settings.BATCH_STORAGE = BatchLoaderTest.batchDir

        batch_dir = os.path.join(BatchLoaderTest.batchDir, "batch_oru_testbatch_ver01")

        loader = BatchLoader(process_ocr=False)
        batch = loader.load_batch(batch_dir)
        self.assertTrue(isinstance(batch, Batch))
        self.assertEqual(batch.name, 'batch_oru_testbatch_ver01')
        self.assertEqual(len(batch.issues.all()), 4)

        title = Title.objects.get(lccn = 'sn83030214')
        self.assertTrue(title.has_issues)

        issue = batch.issues.all()[0]
        self.assertEqual(issue.volume, '1')
        self.assertEqual(issue.number, '1')
        self.assertEqual(issue.edition, 1)
        self.assertEqual(issue.title.lccn, 'sn83030214')
        self.assertEqual(strftime_safe(issue.date_issued, '%Y-%m-%d'), '1999-06-15')
        self.assertEqual(len(issue.pages.all()), 15)

        page = issue.pages.all()[0]
        self.assertEqual(page.sequence, 1)
        self.assertEqual(page.url, '/lccn/sn83030214/1999-06-15/ed-1/seq-1/')

        notes = page.notes.order_by("type").all()
        self.assertEqual(len(notes), 2)
        note = page.notes.all()[0]
        self.assertEqual(note.type, "noteAboutReproduction")
        self.assertEqual(note.text, "Present")
        note = page.notes.all()[1]
        self.assertEqual(note.type, "agencyResponsibleForReproduction")
        self.assertEqual(note.text, "oru")

        # Validate page 1's metadata
        self.assertEqual(page.sequence, 1)
        self.assertEqual(page.jp2_filename, 'sn83030214/print/1999061501/0001.jp2')
        self.assertEqual(page.jp2_length, 411)
        self.assertEqual(page.jp2_width, 411)
        self.assertEqual(page.ocr_filename, 'sn83030214/print/1999061501/0001.xml')
        self.assertEqual(page.pdf_filename, 'sn83030214/print/1999061501/0001.pdf')

        # extract ocr data just for this page
        loader.process_ocr(page, index=False)
        self.assertTrue(page.ocr != None)
        self.assertTrue(len(page.ocr.text) > 0)

        p = Title.objects.get(lccn='sn83030214').issues.all()[0].pages.all()[0]
        self.assertTrue(p.ocr != None)

        # check that the solr_doc looks legit
        solr_doc = page.solr_doc
        self.assertEqual(solr_doc['id'], '/lccn/sn83030214/1999-06-15/ed-1/seq-1/')
        self.assertEqual(solr_doc['type'], 'page')
        self.assertEqual(solr_doc['sequence'], 1)
        self.assertEqual(solr_doc['lccn'], 'sn83030214')
        self.assertEqual(solr_doc['title'], 'New-York tribune.')
        self.assertEqual(solr_doc['date'], '19990615')
        self.assertEqual(solr_doc['batch'], 'batch_oru_testbatch_ver01')
        self.assertEqual(solr_doc['subject'], [
            'New York (N.Y.)--Newspapers.',
            'New York County (N.Y.)--Newspapers.'])
        self.assertEqual(solr_doc['place'], [
            'New York--Brooklyn--New York City', 
            'New York--Queens--New York City'])
        self.assertEqual(solr_doc['note'], [
            "I'll take Manhattan",
            'The Big Apple'])
        self.assertTrue('essay' not in solr_doc)
        self.assertEqual(solr_doc['ocr_eng'], 'LCCNsn83030214Page1')

        # purge the batch and make sure it's gone from the db
        loader.purge_batch('batch_oru_testbatch_ver01')
        self.assertEqual(Batch.objects.all().count(), 0)
        self.assertEqual(Title.objects.get(lccn='sn83030214').has_issues, False)