def test_we_dont_make_multiple_links(self): self.assertEqual(Copyright.objects.count(), 0) i1 = Issue(date_issued=datetime.date(1800, 1, 1)) c1 = i1.copyright_link self.assertEqual(Copyright.objects.count(), 1) dbc1 = Copyright.objects.all()[0] i2 = Issue(date_issued=datetime.date(1801, 1, 1)) c2 = i2.copyright_link self.assertEqual(Copyright.objects.count(), 1) self.assertEqual(c1.uri, c2.uri) self.assertEqual(c1.label, c2.label)
def handle(self, *args, **options): if len(args) != 2: raise CommandError( 'You must specify a file to dump and the pk of the type') with open(args[0], 'r') as f: output = f.read() issues = json.loads(output) type = Type.objects.get(pk=args[1]) for issue in issues: try: i = Issue( **{ 'lat': issue['lat'], 'lon': issue['lon'], 'description': issue['description'], 'type': type }) i.save() except (IntegrityError, pygeolib.GeocoderError): pass self.stdout.write('Successfully dump file to the database')
def test_non_pd_issue_has_no_copyright_link(self): # This is the absolute earliest an issue can be non-PD i = Issue(date_issued=datetime.date(datetime.date.today().year - 95, 1, 1)) c = i.copyright_link # No copyright self.assertTrue(c is None) # No link is created in the db self.assertEqual(Copyright.objects.count(), 0)
def update_latest(self, item, spider): """Update the latest issues in a collection.""" spider_name = spider.name.lower() source = Source.objects.get(spider=spider_name) try: manga = Manga.objects.get(url=item['url'], source=source) except Manga.DoesNotExist: # The manga is not a current one. We simply ignore it # because will be created in the next full sync. return for item_issue in item['issues']: if not manga.issue_set.filter(url=item_issue['url']).exists(): issue = Issue() self._update_issue(issue, item_issue) manga.issue_set.add(issue, bulk=False)
def test_empty_db_pd_issue_has_copyright_link(self): self.assertEqual(Copyright.objects.count(), 0) # This is the absolute latest an issue can be considered public domain i = Issue(date_issued=datetime.date(datetime.date.today().year - 96, 12, 31)) c = i.copyright_link self.assertEqual(Copyright.objects.count(), 1) dbc = Copyright.objects.all()[0] # Make sure what got stuffed in the DB is the same as what got returned # since the return is created on the fly. self.assertEqual(c.label, dbc.label) self.assertEqual(c.uri, dbc.uri) # Next we make sure label and URI aren't empty. We don't hard-code the # expected label/uri, we just want to be sure they exist. self.assertTrue(len(c.label) > 10) self.assertTrue(len(c.uri) > 10)
def _load_issue(self, mets_file): _logger.debug("parsing issue mets file: %s" % mets_file) doc = etree.parse(mets_file) # get the mods for the issue div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0] dmdid = div.attrib['DMDID'] mods = dmd_mods(doc, dmdid) # set up a new Issue issue = Issue() issue.volume = mods.xpath( 'string(.//mods:detail[@type="volume"]/mods:number[1])', namespaces=ns).strip() issue.number = mods.xpath( 'string(.//mods:detail[@type="issue"]/mods:number[1])', namespaces=ns).strip() issue.edition = int( mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:number[1])', namespaces=ns)) issue.edition_label = mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:caption[1])', namespaces=ns).strip() # parse issue date date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns) issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d') # attach the Issue to the appropriate Title lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])', namespaces=ns).strip() try: title = Title.objects.get(lccn=lccn) except Exception, e: url = settings.MARC_RETRIEVAL_URLFORMAT % lccn logging.info("attempting to load marc record from %s", url) management.call_command('load_titles', url) title = Title.objects.get(lccn=lccn)
def save_city(self, city): city_objs = self.retrieve_city(city) for object in city_objs: if 'id' in object and object['latitud'] and object['longitud']: issue = Issue() issue.lat = float(object['latitud']) issue.lon = float(object['longitud']) issue.address = object['direccion'] or ' ' issue.description = object['desperfectoTexto'].replace("Tipo de incidencia: ", '') or ' ' if object['desperfecto'] in self.crash: issue.type = models.Type.objects.get(pk=5) elif object['desperfecto'] in self.disturb: issue.type = models.Type.objects.get(pk=4) elif object['desperfecto'] in self.noise_polution: issue.type = models.Type.objects.get(pk=2) elif object['desperfecto'] in self.vandalism: issue.type = models.Type.objects.get(pk=1) else: issue.type = models.Type.objects.get(pk=9) issue.save()
def _load_issue(self, mets_file): _logger.debug("parsing issue mets file: %s" % mets_file) doc = etree.parse(mets_file) # get the mods for the issue div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0] dmdid = div.attrib['DMDID'] mods = dmd_mods(doc, dmdid) # set up a new Issue issue = Issue() issue.volume = mods.xpath( 'string(.//mods:detail[@type="volume"]/mods:number[1])', namespaces=ns).strip() issue.number = mods.xpath( 'string(.//mods:detail[@type="issue"]/mods:number[1])', namespaces=ns).strip() issue.edition = int( mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:number[1])', namespaces=ns)) issue.edition_label = mods.xpath( 'string(.//mods:detail[@type="edition"]/mods:caption[1])', namespaces=ns).strip() # parse issue date date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns) issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d') # attach the Issue to the appropriate Title lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])', namespaces=ns).strip() try: title = Title.objects.get(lccn=lccn) except Exception as e: url = settings.MARC_RETRIEVAL_URLFORMAT % lccn _logger.info("attempting to load marc record from %s", url) management.call_command('load_titles', url) title = Title.objects.get(lccn=lccn) issue.title = title issue.batch = self.current_batch issue.save() _logger.debug("saved issue: %s" % issue.url) notes = [] for mods_note in mods.xpath('.//mods:note', namespaces=ns): type = mods_note.xpath('string(./@type)') label = mods_note.xpath('string(./@displayLabel)') text = mods_note.xpath('string(.)') note = models.IssueNote(type=type, label=label, text=text) notes.append(note) issue.notes.set(notes, bulk=False) issue.save() # attach pages: lots of logging because it's expensive for page_div in div.xpath('.//mets:div[@TYPE="np:page"]', namespaces=ns): try: page = self._load_page(doc, page_div, issue) self.pages_processed += 1 except BatchLoaderException as e: _logger.exception(e) return issue