Ejemplo n.º 1
0
    def test_we_dont_make_multiple_links(self):
        self.assertEqual(Copyright.objects.count(), 0)
        i1 = Issue(date_issued=datetime.date(1800, 1, 1))
        c1 = i1.copyright_link
        self.assertEqual(Copyright.objects.count(), 1)
        dbc1 = Copyright.objects.all()[0]

        i2 = Issue(date_issued=datetime.date(1801, 1, 1))
        c2 = i2.copyright_link
        self.assertEqual(Copyright.objects.count(), 1)

        self.assertEqual(c1.uri, c2.uri)
        self.assertEqual(c1.label, c2.label)
Ejemplo n.º 2
0
    def handle(self, *args, **options):
        if len(args) != 2:
            raise CommandError(
                'You must specify a file to dump and the pk of the type')

        with open(args[0], 'r') as f:
            output = f.read()

        issues = json.loads(output)
        type = Type.objects.get(pk=args[1])

        for issue in issues:
            try:
                i = Issue(
                    **{
                        'lat': issue['lat'],
                        'lon': issue['lon'],
                        'description': issue['description'],
                        'type': type
                    })
                i.save()
            except (IntegrityError, pygeolib.GeocoderError):
                pass

        self.stdout.write('Successfully dump file to the database')
Ejemplo n.º 3
0
 def test_non_pd_issue_has_no_copyright_link(self):
     # This is the absolute earliest an issue can be non-PD
     i = Issue(date_issued=datetime.date(datetime.date.today().year -
                                         95, 1, 1))
     c = i.copyright_link
     # No copyright
     self.assertTrue(c is None)
     # No link is created in the db
     self.assertEqual(Copyright.objects.count(), 0)
Ejemplo n.º 4
0
    def update_latest(self, item, spider):
        """Update the latest issues in a collection."""
        spider_name = spider.name.lower()
        source = Source.objects.get(spider=spider_name)
        try:
            manga = Manga.objects.get(url=item['url'], source=source)
        except Manga.DoesNotExist:
            # The manga is not a current one.  We simply ignore it
            # because will be created in the next full sync.
            return

        for item_issue in item['issues']:
            if not manga.issue_set.filter(url=item_issue['url']).exists():
                issue = Issue()
                self._update_issue(issue, item_issue)
                manga.issue_set.add(issue, bulk=False)
Ejemplo n.º 5
0
    def test_empty_db_pd_issue_has_copyright_link(self):
        self.assertEqual(Copyright.objects.count(), 0)
        # This is the absolute latest an issue can be considered public domain
        i = Issue(date_issued=datetime.date(datetime.date.today().year -
                                            96, 12, 31))
        c = i.copyright_link
        self.assertEqual(Copyright.objects.count(), 1)
        dbc = Copyright.objects.all()[0]

        # Make sure what got stuffed in the DB is the same as what got returned
        # since the return is created on the fly.
        self.assertEqual(c.label, dbc.label)
        self.assertEqual(c.uri, dbc.uri)

        # Next we make sure label and URI aren't empty.  We don't hard-code the
        # expected label/uri, we just want to be sure they exist.
        self.assertTrue(len(c.label) > 10)
        self.assertTrue(len(c.uri) > 10)
Ejemplo n.º 6
0
    def _load_issue(self, mets_file):
        _logger.debug("parsing issue mets file: %s" % mets_file)
        doc = etree.parse(mets_file)

        # get the mods for the issue
        div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0]
        dmdid = div.attrib['DMDID']
        mods = dmd_mods(doc, dmdid)

        # set up a new Issue
        issue = Issue()
        issue.volume = mods.xpath(
            'string(.//mods:detail[@type="volume"]/mods:number[1])',
            namespaces=ns).strip()
        issue.number = mods.xpath(
            'string(.//mods:detail[@type="issue"]/mods:number[1])',
            namespaces=ns).strip()
        issue.edition = int(
            mods.xpath(
                'string(.//mods:detail[@type="edition"]/mods:number[1])',
                namespaces=ns))
        issue.edition_label = mods.xpath(
            'string(.//mods:detail[@type="edition"]/mods:caption[1])',
            namespaces=ns).strip()

        # parse issue date
        date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns)
        issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d')

        # attach the Issue to the appropriate Title
        lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])',
                          namespaces=ns).strip()
        try:
            title = Title.objects.get(lccn=lccn)
        except Exception, e:
            url = settings.MARC_RETRIEVAL_URLFORMAT % lccn
            logging.info("attempting to load marc record from %s", url)
            management.call_command('load_titles', url)
            title = Title.objects.get(lccn=lccn)
Ejemplo n.º 7
0
    def save_city(self, city):

        city_objs = self.retrieve_city(city)

        for object in city_objs:
            if 'id' in object and object['latitud'] and object['longitud']:
                issue = Issue()
                issue.lat = float(object['latitud'])
                issue.lon = float(object['longitud'])
                issue.address = object['direccion'] or ' '
                issue.description = object['desperfectoTexto'].replace("Tipo de incidencia: ", '') or ' '

                if object['desperfecto'] in self.crash:
                    issue.type = models.Type.objects.get(pk=5)
                elif object['desperfecto'] in self.disturb:
                    issue.type = models.Type.objects.get(pk=4)
                elif object['desperfecto'] in self.noise_polution:
                    issue.type = models.Type.objects.get(pk=2)
                elif object['desperfecto'] in self.vandalism:
                    issue.type = models.Type.objects.get(pk=1)
                else:
                    issue.type = models.Type.objects.get(pk=9)

                issue.save()
Ejemplo n.º 8
0
    def _load_issue(self, mets_file):
        _logger.debug("parsing issue mets file: %s" % mets_file)
        doc = etree.parse(mets_file)

        # get the mods for the issue
        div = doc.xpath('.//mets:div[@TYPE="np:issue"]', namespaces=ns)[0]
        dmdid = div.attrib['DMDID']
        mods = dmd_mods(doc, dmdid)

        # set up a new Issue
        issue = Issue()
        issue.volume = mods.xpath(
            'string(.//mods:detail[@type="volume"]/mods:number[1])',
            namespaces=ns).strip()
        issue.number = mods.xpath(
            'string(.//mods:detail[@type="issue"]/mods:number[1])',
            namespaces=ns).strip()
        issue.edition = int(
            mods.xpath(
                'string(.//mods:detail[@type="edition"]/mods:number[1])',
                namespaces=ns))
        issue.edition_label = mods.xpath(
            'string(.//mods:detail[@type="edition"]/mods:caption[1])',
            namespaces=ns).strip()

        # parse issue date
        date_issued = mods.xpath('string(.//mods:dateIssued)', namespaces=ns)
        issue.date_issued = datetime.strptime(date_issued, '%Y-%m-%d')

        # attach the Issue to the appropriate Title
        lccn = mods.xpath('string(.//mods:identifier[@type="lccn"])',
                          namespaces=ns).strip()
        try:
            title = Title.objects.get(lccn=lccn)
        except Exception as e:
            url = settings.MARC_RETRIEVAL_URLFORMAT % lccn
            _logger.info("attempting to load marc record from %s", url)
            management.call_command('load_titles', url)
            title = Title.objects.get(lccn=lccn)

        issue.title = title

        issue.batch = self.current_batch
        issue.save()
        _logger.debug("saved issue: %s" % issue.url)

        notes = []
        for mods_note in mods.xpath('.//mods:note', namespaces=ns):
            type = mods_note.xpath('string(./@type)')
            label = mods_note.xpath('string(./@displayLabel)')
            text = mods_note.xpath('string(.)')
            note = models.IssueNote(type=type, label=label, text=text)
            notes.append(note)
        issue.notes.set(notes, bulk=False)
        issue.save()

        # attach pages: lots of logging because it's expensive
        for page_div in div.xpath('.//mets:div[@TYPE="np:page"]',
                                  namespaces=ns):
            try:
                page = self._load_page(doc, page_div, issue)
                self.pages_processed += 1
            except BatchLoaderException as e:
                _logger.exception(e)

        return issue