def _title_range(reel): agg = models.Issue.objects.filter(pages__reel=reel).distinct().aggregate( mn=Min('date_issued'), mx=Max('date_issued')) if agg['mn'] and agg['mx']: mn = strftime_safe(agg['mn'], '%b %d, %Y') mx = strftime_safe(agg['mx'], '%b %d, %Y') return "%s - %s" % (mn, mx) else: return ""
def __str__(self): parts = ['%s' % self.issue.title] parts.append(strftime_safe(self.issue.date_issued, '%B %d, %Y')) if self.issue.edition_label: parts.append(self.issue.edition_label) if self.section_label: parts.append(self.section_label) parts.append('Image %s' % self.sequence) return ', '.join(parts)
def label(instance): if isinstance(instance, models.Title): return '%s (%s) %s-%s' % (instance.display_name, instance.place_of_publication, instance.start_year, instance.end_year) elif isinstance(instance, models.Issue): parts = [] parts.append(strftime_safe(instance.date_issued, '%B %d, %Y')) if instance.edition_label: parts.append("%s" % instance.edition_label) return ', '.join(parts) elif isinstance(instance, models.Page): parts = [] if instance.section_label: parts.append(instance.section_label) if instance.number: parts.append('Page %s' % instance.number) parts.append('Image %s' % instance.sequence) return ', '.join(parts) else: return "%s" % instance
def json(self, host, serialize=True): j = { "@context": "http://iiif.io/api/presentation/2/context.json", "@id": settings.BASE_URL + self.json_url, "@type": "sc:Collection", "label": self.display_name, "manifests": [], "metadata": self.metadata } for issue in self.issues.all(): j["manifests"].append({ "@id": settings.BASE_URL + issue.json_url, "@type": "sc:Manifest", "label": strftime_safe(issue.date_issued, '%Y-%m-%d') }) if serialize: return json.dumps(j, indent=2) return j
def test_load_batch(self): # Extract mini-batch tarball to /tmp somewhere tarpath = os.path.join(os.path.dirname(core.__file__), 'test-data', 'testbatch.tgz') tar = tarfile.open(tarpath) tar.extractall(path = BatchLoaderTest.batchDir) tar.close() settings.BATCH_STORAGE = BatchLoaderTest.batchDir batch_dir = os.path.join(BatchLoaderTest.batchDir, "batch_oru_testbatch_ver01") loader = BatchLoader(process_ocr=False) batch = loader.load_batch(batch_dir) self.assertTrue(isinstance(batch, Batch)) self.assertEqual(batch.name, 'batch_oru_testbatch_ver01') self.assertEqual(len(batch.issues.all()), 4) title = Title.objects.get(lccn = 'sn83030214') self.assertTrue(title.has_issues) issue = batch.issues.all()[0] self.assertEqual(issue.volume, '1') self.assertEqual(issue.number, '1') self.assertEqual(issue.edition, 1) self.assertEqual(issue.title.lccn, 'sn83030214') self.assertEqual(strftime_safe(issue.date_issued, '%Y-%m-%d'), '1999-06-15') self.assertEqual(len(issue.pages.all()), 15) page = issue.pages.all()[0] self.assertEqual(page.sequence, 1) self.assertEqual(page.url, '/lccn/sn83030214/1999-06-15/ed-1/seq-1/') notes = page.notes.order_by("type").all() self.assertEqual(len(notes), 2) note = page.notes.all()[0] self.assertEqual(note.type, "noteAboutReproduction") self.assertEqual(note.text, "Present") note = page.notes.all()[1] self.assertEqual(note.type, "agencyResponsibleForReproduction") self.assertEqual(note.text, "oru") # Validate page 1's metadata self.assertEqual(page.sequence, 1) self.assertEqual(page.jp2_filename, 'sn83030214/print/1999061501/0001.jp2') self.assertEqual(page.jp2_length, 411) self.assertEqual(page.jp2_width, 411) self.assertEqual(page.ocr_filename, 'sn83030214/print/1999061501/0001.xml') self.assertEqual(page.pdf_filename, 'sn83030214/print/1999061501/0001.pdf') # extract ocr data just for this page loader.process_ocr(page, index=False) self.assertTrue(page.ocr != None) self.assertTrue(len(page.ocr.text) > 0) p = Title.objects.get(lccn='sn83030214').issues.all()[0].pages.all()[0] self.assertTrue(p.ocr != None) # check that the solr_doc looks legit solr_doc = page.solr_doc self.assertEqual(solr_doc['id'], '/lccn/sn83030214/1999-06-15/ed-1/seq-1/') self.assertEqual(solr_doc['type'], 'page') self.assertEqual(solr_doc['sequence'], 1) self.assertEqual(solr_doc['lccn'], 'sn83030214') self.assertEqual(solr_doc['title'], 'New-York tribune.') self.assertEqual(solr_doc['date'], '19990615') self.assertEqual(solr_doc['batch'], 'batch_oru_testbatch_ver01') self.assertEqual(solr_doc['subject'], [ 'New York (N.Y.)--Newspapers.', 'New York County (N.Y.)--Newspapers.']) self.assertEqual(solr_doc['place'], [ 'New York--Brooklyn--New York City', 'New York--Queens--New York City']) self.assertEqual(solr_doc['note'], [ "I'll take Manhattan", 'The Big Apple']) self.assertTrue('essay' not in solr_doc) self.assertEqual(solr_doc['ocr_eng'], 'LCCNsn83030214Page1') # purge the batch and make sure it's gone from the db loader.purge_batch('batch_oru_testbatch_ver01') self.assertEqual(Batch.objects.all().count(), 0) self.assertEqual(Title.objects.get(lccn='sn83030214').has_issues, False)