Python Page 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: chronam.core.models

클래스/타입: Page

hotexamples.com에서의 예제들: 5

Python Page - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 chronam.core.models.Page에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Page(2)

sequence(2)

issue(1)

jp2_filename(1)

jp2_length(1)

jp2_width(1)

lookup(1)

notes(1)

number(1)

ocr_filename(1)

pdf_filename(1)

reel(1)

save(1)

section_label(1)

tiff_filename(1)

예제 #1

파일 보기

파일: batch_loader.py 프로젝트: kdnp/chronam

    def _load_page(self, doc, div, issue):
        dmdid = div.attrib['DMDID']
        mods = dmd_mods(doc, dmdid)
        page = Page()

        seq_string = mods.xpath(
            'string(.//mods:extent/mods:start)', namespaces=ns)
        try:
            page.sequence = int(seq_string)
        except ValueError, e:
            raise BatchLoaderException("could not determine sequence number for page from '%s'" % seq_string)

예제 #2

파일 보기

파일: batch_loader.py 프로젝트: rugby110/chronam

    def _load_page(self, doc, div, issue):
        dmdid = div.attrib['DMDID']
        mods = dmd_mods(doc, dmdid)
        page = Page()

        seq_string = mods.xpath(
            'string(.//mods:extent/mods:start)', namespaces=ns)
        try:
            page.sequence = int(seq_string)
        except ValueError, e:
            raise BatchLoaderException("could not determine sequence number for page from '%s'" % seq_string)

예제 #3

파일 보기

파일: flickr.py 프로젝트: sshyran/chronam

    def handle(self, key, **options):
        LOGGER.debug("looking for chronam page content on flickr")
        create_count = 0

        for flickr_url, chronam_url in flickr_chronam_links(key):
            self.stdout.write("found flickr/chronam link: %s, %s" %
                              (flickr_url, chronam_url))

            # use the page url to locate the Page model
            path = urlparse(chronam_url).path
            page = Page.lookup(path)
            if not page:
                self.stderr.write("page for %s not found" % chronam_url)
                continue

            # create the FlickrUrl attached to the apprpriate page
            f, created = FlickrUrl.objects.get_or_create(value=flickr_url,
                                                         page=page)
            if created:
                create_count += 1
                f.save()
                self.stdout.write("updated page (%s) with flickr url (%s)" %
                                  (page, flickr_url))
            else:
                self.stdout.write("already knew about %s" % flickr_url)

        self.stdout.write("created %s flickr urls" % create_count)

예제 #4

파일 보기

파일: flickr.py 프로젝트: LibraryOfCongress/chronam

    def handle(self, key, **options):
        LOGGER.debug("looking for chronam page content on flickr")
        create_count = 0

        for flickr_url, chronam_url in flickr_chronam_links(key):
            self.stdout.write("found flickr/chronam link: %s, %s" % (flickr_url, chronam_url))

            # use the page url to locate the Page model
            path = urlparse(chronam_url).path
            page = Page.lookup(path)
            if not page:
                self.stderr.write("page for %s not found" % chronam_url)
                continue

            # create the FlickrUrl attached to the apprpriate page
            f, created = FlickrUrl.objects.get_or_create(value=flickr_url, page=page)
            if created:
                create_count += 1
                f.save()
                self.stdout.write("updated page (%s) with flickr url (%s)" % (page, flickr_url))
            else:
                self.stdout.write("already knew about %s" % flickr_url)

        self.stdout.write("created %s flickr urls" % create_count)

예제 #5

파일 보기

    def _load_page(self, doc, div, issue):
        dmdid = div.attrib['DMDID']
        mods = dmd_mods(doc, dmdid)
        page = Page()

        seq_string = mods.xpath(
            'string(.//mods:extent/mods:start)', namespaces=ns)
        try:
            page.sequence = int(seq_string)
        except ValueError:
            raise BatchLoaderException("could not determine sequence number for page from '%s'" % seq_string)
        page.number = mods.xpath(
            'string(.//mods:detail[@type="page number"])',
            namespaces=ns
        ).strip()

        reel_number = mods.xpath(
            'string(.//mods:identifier[@type="reel number"])',
            namespaces=ns
        ).strip()
        try:
            reel = models.Reel.objects.get(number=reel_number,
                                           batch=self.current_batch)
            page.reel = reel
        except models.Reel.DoesNotExist:
            if reel_number:
                reel = models.Reel(number=reel_number,
                                   batch=self.current_batch,
                                   implicit=True)
                reel.save()
                page.reel = reel
            else:
                LOGGER.warn("unable to find reel number in page metadata")

        LOGGER.info("Assigned page sequence: %s", page.sequence)

        _section_dmdid = div.xpath(
            'string(ancestor::mets:div[@TYPE="np:section"]/@DMDID)',
            namespaces=ns)
        if _section_dmdid:
            section_mods = dmd_mods(doc, _section_dmdid)
            section_label = section_mods.xpath(
                'string(.//mods:detail[@type="section label"]/mods:number[1])',
                namespaces=ns).strip()
            if section_label:
                page.section_label = section_label

        page.issue = issue

        LOGGER.info("Saving page. issue date: %s, page sequence: %s", issue.date_issued, page.sequence)

        # TODO - consider the possibility of executing the file name
        #        assignments (below) before this page.save().
        page.save()

        notes = []
        for mods_note in mods.xpath('.//mods:note', namespaces=ns):
            type = mods_note.xpath('string(./@type)')
            label = mods_note.xpath('string(./@displayLabel)')
            text = mods_note.xpath('string(.)').strip()
            note = models.PageNote(type=type, label=label, text=text)
            notes.append(note)
        page.notes = notes

        # there's a level indirection between the METS structmap and the
        # details about specific files in this package ...
        # so we have to first get the FILEID from the issue div in the
        # structmap and then use it to look up the file details in the
        # larger document.

        for fptr in div.xpath('./mets:fptr', namespaces=ns):
            file_id = fptr.attrib['FILEID']
            file_el = doc.xpath('.//mets:file[@ID="%s"]' % file_id,
                                namespaces=ns)[0]
            file_type = file_el.attrib['USE']

            # get the filename relative to the storage location
            file_name = file_el.xpath('string(./mets:FLocat/@xlink:href)',
                                      namespaces=ns)
            file_name = urlparse.urljoin(doc.docinfo.URL, file_name)
            file_name = self.storage_relative_path(file_name)

            if file_type == 'master':
                page.tiff_filename = file_name
            elif file_type == 'service':
                page.jp2_filename = file_name
                try:
                    # extract image dimensions from technical metadata for jp2
                    for admid in file_el.attrib['ADMID'].split(' '):
                        length, width = get_dimensions(doc, admid)
                        if length and width:
                            page.jp2_width = width
                            page.jp2_length = length
                            break
                except KeyError:
                    LOGGER.info("Could not determine dimensions of jp2 for issue: %s page: %s... trying harder...", page.issue, page)

                if not page.jp2_width:
                    raise BatchLoaderException("No jp2 width for issue: %s page: %s" % (page.issue, page))
                if not page.jp2_length:
                    raise BatchLoaderException("No jp2 length for issue: %s page: %s" % (page.issue, page))
            elif file_type == 'derivative':
                page.pdf_filename = file_name
            elif file_type == 'ocr':
                page.ocr_filename = file_name

        if page.ocr_filename:
            # don't incurr overhead of extracting ocr text, word coordinates
            # and indexing unless the batch loader has been set up to do it
            if self.PROCESS_OCR:
                page = self.process_ocr(page)
        else:
            LOGGER.info("No ocr filename for issue: %s page: %s", page.issue, page)

        LOGGER.debug("saving page: %s", page.url)
        page.save()
        return page