Exemple #1
0
def page_print(request, lccn, date, edition, sequence,
               width, height, x1, y1, x2, y2):
    page = get_page(lccn, date, edition, sequence)
    title = get_object_or_404(models.Title, lccn=lccn)
    issue = page.issue
    page_title = "%s, %s, %s" % (label(title), label(issue), label(page))
    crumbs = create_crumbs(title, issue, date, edition, page)
    host = request.get_host()
    image_credit = page.issue.batch.awardee.name
    path_parts = dict(lccn=lccn, date=date, edition=edition,
                      sequence=sequence,
                      width=width, height=height,
                      x1=x1, y1=y1, x2=x2, y2=y2)
    url = urlresolvers.reverse('chronam_page_print',
                               kwargs=path_parts)
    width, height = int(width), int(height)
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    width = min(width, (x2-x1))
    height = min(height, (y2-y1))
    image_url = settings.IIIF + '%2F' \
           + page.issue.batch.path.replace('/opt/chronam/data/dlg_batches/','').replace('/','%2F') \
           + page.jp2_filename.replace('/','%2F') + '/' \
           + str(x1) + ',' + str(y1) + ',' + str(x2 - x1) + ',' + str(y2 - y1) \
           + '/' + str(width) + ',' + str(height) + '/0/default.jpg'
    return render_to_response('page_print.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Exemple #2
0
def page_rdf(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    graph = page_to_graph(page)
    response = HttpResponse(graph.serialize(base=_rdf_base(request),
                                            include_base=True),
                            mimetype='application/rdf+xml')
    return response
Exemple #3
0
def page_rdf(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    graph = page_to_graph(page)
    response = HttpResponse(graph.serialize(base=_rdf_base(request),
                                            include_base=True),
                            content_type='application/rdf+xml')
    return add_cache_tag(response, "lccn=%s" % lccn)
Exemple #4
0
def page_rdf(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    graph = page_to_graph(page)
    response = HttpResponse(graph.serialize(base=_rdf_base(request),
                                            include_base=True),
                            mimetype='application/rdf+xml')
    return response
Exemple #5
0
def load_topic_and_categories():
    """
    This function takes a list topics/topic_categories and creates 
    instances of models.Topic and models.TopicCategory exist with the 
    given name, if one such instance doesn't already exist.

    #TODO: some parts of the code has ugly hacks to scrub text out of 
           html. This will fail if structure of target html changes. 
           Revisit! 
    """
    page = html.fromstring(urllib.urlopen('%s%s' % (settings.TOPICS_ROOT_URL,
                                                settings.TOPICS_SUBJECT_URL)).read())
    total_topics = total_categories = new_topics = new_categories = filed_topics = 0
    topics = list(page.iterdescendants('li'))
    category = None
    for topic_or_category in topics:
        if topic_or_category.text:
            #its a category, check if exists/ create one
            total_categories += 1
            category_name = topic_or_category.text.rstrip(':')
            category, is_new = models.TopicCategory.objects.get_or_create(name=category_name)
            if is_new:
                new_categories += 1
            _logger.info('Syncing category %s' % category_name)
        else:
            topic, start, end = prepare_topic_for_db_insert(
                                          topic_or_category.text_content())
            total_topics += 1
            topic, is_new = models.Topic.objects.get_or_create(name=topic, topic_start_year=start, 
                                       topic_end_year=end, category=category)
            if is_new:
                new_topics += 1
            _logger.info('Syncing topic %s' % topic.name)
            topic_url = list(topic_or_category.iterlinks())[0][2]
            if not topic_url.startswith('http://'):
                topic_url = '%s/%s' % (settings.TOPICS_ROOT_URL, topic_url)
            topic_page = html.fromstring(urllib.urlopen(topic_url).read())
            topic.intro_text = list(topic_page.iterdescendants('p'))[0].text_content().encode('utf-8')
            topic.important_dates = list(topic_page.iterdescendants('ul'))[0].text_content().encode('utf-8')
            topic.suggested_search_terms = list(topic_page.iterdescendants('ul'))[1].text_content().encode('utf-8')
            topic.save()
            pages = list(topic_page.iterdescendants('ul'))[-1]
            for page in pages:
                page_url = list(page.iterlinks())[0][2]
                params = page_url.split('/')
                chronam_page = None
                try:
                    params = params[params.index('lccn')+1:]
                    chronam_page = utils.get_page(params[0], params[1], 
                                                  params[2][-1:], params[3][-1:])
                    _logger.info('Syncing topic with page :- lccn:%s.' % params[0])

                except ValueError: pass
                except Http404: pass

                models.TopicPages.objects.get_or_create(page=chronam_page, topic=topic,
                                          query_params=params[-1], url=page_url,
                                          title=list(page.iterlinks())[0][0].text,
                                          description=page.text_content().lstrip(list(
                                             page.iterchildren())[0].text).lstrip('"').lstrip(','))
Exemple #6
0
def chronam_topic(request, topic_id):
    topic = get_object_or_404(models.Topic, pk=topic_id)
    page_title = topic.name
    crumbs = list(settings.BASE_CRUMBS)
    if urlresolvers.reverse('recommended_topics') in request.META.get('HTTP_REFERER'):
        crumbs.extend([{'label': 'Recommended Topics',        
                        'href': urlresolvers.reverse('recommended_topics')},
                       {'label': topic.name,
                        'href': urlresolvers.reverse('chronam_topic', 
                                              kwargs={'topic_id': topic.pk})}])
    else:
        referer = re.sub('^https?:\/\/', '', request.META.get('HTTP_REFERER')).split('/')
        try:
            lccn, date, edition, sequence = referer[2], referer[3], referer[4][-1], referer[5][-1]
            page = get_page(lccn, date, edition, sequence)
            if page: 
                title, issue, page = _get_tip(lccn, date, edition, sequence)
                crumbs = create_crumbs(title, issue, date, edition, page)
                crumbs.extend([{'label': topic.name,
                                'href': urlresolvers.reverse('chronam_topic',
                                              kwargs={'topic_id': topic.pk})}])
        except:
            pass
    important_dates = filter(lambda s: not s.isspace(), topic.important_dates.split('\n '))
    search_suggestions = topic.suggested_search_terms.split('\t')
    chronam_pages = [{'title': t.title, 'description': t.description.lstrip(t.title),
                      'url': t.url} for t in topic.topicpages_set.all()]
    return render_to_response('topic.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Exemple #7
0
def page_rdf(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    graph = page_to_graph(page)
    response = HttpResponse(graph.serialize(base=_rdf_base(request),
                                            include_base=True),
                            content_type='application/rdf+xml')
    return add_cache_tag(response, "lccn=%s" % lccn)
Exemple #8
0
def page_print(request, lccn, date, edition, sequence, width, height, x1, y1,
               x2, y2):
    page = get_page(lccn, date, edition, sequence)
    title = get_object_or_404(models.Title, lccn=lccn)
    issue = page.issue
    page_title = "%s, %s, %s" % (label(title), label(issue), label(page))
    crumbs = create_crumbs(title, issue, date, edition, page)
    host = request.get_host()
    image_credit = page.issue.batch.awardee.name
    path_parts = dict(lccn=lccn,
                      date=date,
                      edition=edition,
                      sequence=sequence,
                      width=width,
                      height=height,
                      x1=x1,
                      y1=y1,
                      x2=x2,
                      y2=y2)
    url = urlresolvers.reverse('chronam_page_print', kwargs=path_parts)

    response = render_to_response('page_print.html',
                                  dictionary=locals(),
                                  context_instance=RequestContext(request))
    return add_cache_tag(response, "lccn=%s" % lccn)
Exemple #9
0
def medium(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    try:
        im = _get_resized_image(page, 550)
    except IOError as e:
        return HttpResponseServerError("Unable to create thumbnail: %s" % e)
    response = HttpResponse(content_type="image/jpeg")
    im.save(response, "JPEG")
    return add_cache_tag(response, "lccn=%s" % lccn)
Exemple #10
0
def thumbnail(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    if settings.PREGEN_THUMBNAILS:
        im = _get_image(page, True)
    else:
        try:
            im = _get_resized_image(page, settings.THUMBNAIL_WIDTH)
        except IOError, e:
            return HttpResponseServerError("Unable to create thumbnail: %s" % e)
Exemple #11
0
def medium(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    try:
        im = _get_resized_image(page, 550)
    except IOError as e:
        return HttpResponseServerError("Unable to create thumbnail: %s" % e)
    response = HttpResponse(content_type="image/jpeg")
    im.save(response, "JPEG")
    return add_cache_tag(response, "lccn=%s" % lccn)
Exemple #12
0
def page_image(request, lccn, date, edition, sequence, width, height):
    page = get_page(lccn, date, edition, sequence)

    if settings.REDIRECT_IMAGES_TO_IIIF:
        # We'll redirect directly to avoid the duplicate database query:
        return HttpResponseRedirect(
            page.iiif_client.size(width=width, height=height))
    else:
        return page_image_tile(request, lccn, date, edition, sequence, width,
                               height, 0, 0, page.jp2_width, page.jp2_length)
Exemple #13
0
def thumbnail(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    if settings.PREGEN_THUMBNAILS:
        im = _get_image(page, True)
    else:
        try:
            im = _get_resized_image(page, settings.THUMBNAIL_WIDTH)
        except IOError, e:
            return HttpResponseServerError("Unable to create thumbnail: %s" %
                                           e)
Exemple #14
0
def page_print(request, lccn, date, edition, sequence, width, height, x1, y1,
               x2, y2):
    width, height, x1, y1, x2, y2 = map(int, (width, height, x1, y1, x2, y2))
    page = get_page(lccn, date, edition, sequence)
    title = get_object_or_404(models.Title, lccn=lccn)
    issue = page.issue
    page_title = "%s, %s, %s" % (label(title), label(issue), label(page))
    crumbs = create_crumbs(title, issue, date, edition, page)
    host = request.get_host()
    image_credit = page.issue.batch.awardee.name
    path_parts = {
        "lccn": lccn,
        "date": date,
        "edition": edition,
        "sequence": sequence,
        "width": width,
        "height": height,
        "x1": x1,
        "y1": y1,
        "x2": x2,
        "y2": y2,
    }
    url = urlresolvers.reverse("chronam_page_print", kwargs=path_parts)

    download_filename = "%s %s %s %s image %dx%d from %dx%d to %dx%d.jpg" % (
        lccn,
        date,
        edition,
        sequence,
        width,
        height,
        x1,
        y1,
        x2,
        y2,
    )

    if page.iiif_client:
        download_url = page.iiif_client.region(x=x1,
                                               y=y1,
                                               width=x2 - x1,
                                               height=y2 - y1)
        image_url = download_url.size(width=width, height=height)
    else:
        download_url = urlresolvers.reverse("chronam_page_image_tile",
                                            kwargs=path_parts)
        image_url = urlresolvers.reverse("chronam_page_image_tile",
                                         kwargs=path_parts)

    response = render_to_response("page_print.html",
                                  dictionary=locals(),
                                  context_instance=RequestContext(request))
    return add_cache_tag(response, "lccn=%s" % lccn)
Exemple #15
0
def similar_pages(page):
    solr = SolrConnection(settings.SOLR)
    d = page.issue.date_issued
    year, month, day = '{0:02d}'.format(d.year), '{0:02d}'.format(d.month), '{0:02d}'.format(d.day) 
    date = ''.join(map(str, (year, month, day)))

    query = '+type:page AND date:%s AND %s AND NOT(lccn:%s)' % (date, query_join(map(lambda p: p.city, 
                                           page.issue.title.places.all()), 'city'), page.issue.title.lccn)
    response = solr.query(query, rows=25)
    results = response.results
    return map(lambda kwargs: utils.get_page(**kwargs), 
               map(lambda r: urlresolvers.resolve(r['id']).kwargs, results))
Exemple #16
0
def page_image_tile(request, lccn, date, edition, sequence, width, height, x1, y1, x2, y2):
    page = get_page(lccn, date, edition, sequence)
    if "download" in request.GET and request.GET["download"]:
        response = HttpResponse(mimetype="binary/octet-stream")
    else:
        response = HttpResponse(mimetype="image/jpeg")

    width, height = int(width), int(height)
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    try:
        im = _get_image(page)
    except IOError, e:
        return HttpResponseServerError("Unable to create image tile: %s" % e)
Exemple #17
0
def similar_pages(page):
    solr = SolrConnection(settings.SOLR)
    d = page.issue.date_issued
    year, month, day = '{0:02d}'.format(d.year), '{0:02d}'.format(d.month), '{0:02d}'.format(d.day) 
    date = ''.join(map(str, (year, month, day)))
    if page.issue.title.places.all()[0].city:  
        query = '+type:page AND date:%s AND %s AND NOT(lccn:%s)' % (date, query_join(map(lambda p: p.city, 
                                           page.issue.title.places.all()), 'city'), page.issue.title.lccn)
        response = solr.query(query, rows=25)
        results = response.results
        return map(lambda kwargs: utils.get_page(**kwargs), 
               map(lambda r: urlresolvers.resolve(r['id']).kwargs, results))
    return None
Exemple #18
0
def thumbnail(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)

    if settings.REDIRECT_IMAGES_TO_IIIF:
        return HttpResponseRedirect(page.thumb_url)
    else:
        try:
            im = _get_resized_image(page, settings.THUMBNAIL_WIDTH)
        except IOError as e:
            return HttpResponseServerError("Unable to create thumbnail: %s" %
                                           e)
        response = HttpResponse(content_type="image/jpeg")
        im.save(response, "JPEG")
        return response
Exemple #19
0
def page_image_tile(request, lccn, date, edition, sequence, width, height, x1,
                    y1, x2, y2):
    page = get_page(lccn, date, edition, sequence)
    if 'download' in request.GET and request.GET['download']:
        response = HttpResponse(content_type="binary/octet-stream")
    else:
        response = HttpResponse(content_type="image/jpeg")

    width, height = int(width), int(height)
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    try:
        im = _get_image(page)
    except IOError, e:
        return HttpResponseServerError("Unable to create image tile: %s" % e)
Exemple #20
0
def page_image_tile(request, lccn, date, edition, sequence, width, height, x1,
                    y1, x2, y2):
    page = get_page(lccn, date, edition, sequence)
    width, height = map(int, (width, height))
    x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))

    if settings.REDIRECT_IMAGES_TO_IIIF:
        return redirect_to_iiif(request, page.iiif_client, width, height, x1,
                                y1, x2, y2)
    else:
        try:
            return serve_image_tile(request, _get_image(page), width, height,
                                    x1, y1, x2, y2)
        except EnvironmentError as e:
            logging.exception("Unable to create image tile for %s", page)
            return HttpResponseServerError("Unable to create image tile: %s" %
                                           e)
Exemple #21
0
def page_print(request, lccn, date, edition, sequence,
               width, height, x1, y1, x2, y2):
    page = get_page(lccn, date, edition, sequence)
    title = get_object_or_404(models.Title, lccn=lccn)
    issue = page.issue
    page_title = "%s, %s, %s" % (label(title), label(issue), label(page))
    crumbs = create_crumbs(title, issue, date, edition, page)
    host = request.get_host()
    image_credit = page.issue.batch.awardee.name
    path_parts = dict(lccn=lccn, date=date, edition=edition,
                      sequence=sequence,
                      width=width, height=height,
                      x1=x1, y1=y1, x2=x2, y2=y2)
    url = urlresolvers.reverse('chronam_page_print',
                               kwargs=path_parts)

    return render_to_response('page_print.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Exemple #22
0
def page_image_tile(request, lccn, date, edition, sequence, width, height, x1,
                    y1, x2, y2):
    page = get_page(lccn, date, edition, sequence)
    if 'download' in request.GET and request.GET['download']:
        response = HttpResponse(content_type="binary/octet-stream")
    else:
        response = HttpResponse(content_type="image/jpeg")

    width, height = int(width), int(height)
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    try:
        im = _get_image(page)
    except IOError as e:
        return HttpResponseServerError("Unable to create image tile: %s" % e)

    width = min(width, (x2 - x1))
    height = min(height, (y2 - y1))

    c = im.crop((x1, y1, x2, y2))
    f = c.resize((width, height))
    f.save(response, "JPEG")
    return add_cache_tag(response, "lccn=%s" % lccn)
Exemple #23
0
def page_image_tile(request, lccn, date, edition, sequence,
                    width, height, x1, y1, x2, y2):
    page = get_page(lccn, date, edition, sequence)
    if 'download' in request.GET and request.GET['download']:
        response = HttpResponse(content_type="binary/octet-stream")
    else:
        response = HttpResponse(content_type="image/jpeg")

    width, height = int(width), int(height)
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    try:
        im = _get_image(page)
    except IOError as e:
        return HttpResponseServerError("Unable to create image tile: %s" % e)

    width = min(width, (x2 - x1))
    height = min(height, (y2 - y1))

    c = im.crop((x1, y1, x2, y2))
    f = c.resize((width, height))
    f.save(response, "JPEG")
    return add_cache_tag(response, "lccn=%s" % lccn)
Exemple #24
0
def page_image(request, lccn, date, edition, sequence, width, height):
    page = get_page(lccn, date, edition, sequence)
    return page_image_tile(request, lccn, date, edition, sequence, width,
                           height, 0, 0, page.jp2_width, page.jp2_length)
Exemple #25
0
def page_image(request, lccn, date, edition, sequence, width, height):
    page = get_page(lccn, date, edition, sequence)
    return page_image_tile(request, lccn, date, edition, sequence, width, height, 0, 0, page.jp2_width, page.jp2_length)
Exemple #26
0
def medium(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    try:
        im = _get_resized_image(page, 550)
    except IOError, e:
        return HttpResponseServerError("Unable to create thumbnail: %s" % e)
Exemple #27
0
def medium(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    try:
        im = _get_resized_image(page, 550)
    except IOError, e:
        return HttpResponseServerError("Unable to create thumbnail: %s" % e)
Exemple #28
0
def load_topic_and_categories():
    """
    This function takes a list topics/topic_categories and creates 
    instances of models.Topic and models.TopicCategory exist with the 
    given name, if one such instance doesn't already exist.

    #TODO: some parts of the code has ugly hacks to scrub text out of 
           html. This will fail if structure of target html changes. 
           Revisit! 
    """
    page = html.fromstring(
        urllib.urlopen(
            '%s%s' %
            (settings.TOPICS_ROOT_URL, settings.TOPICS_SUBJECT_URL)).read())
    total_topics = total_categories = new_topics = new_categories = filed_topics = 0
    topics = list(page.iterdescendants('li'))
    category = None
    for topic_or_category in topics:
        if topic_or_category.text:
            #its a category, check if exists/ create one
            total_categories += 1
            category_name = topic_or_category.text.rstrip(':')
            category, is_new = models.TopicCategory.objects.get_or_create(
                name=category_name)
            if is_new:
                new_categories += 1
            _logger.info('Syncing category %s' % category_name)
        else:
            topic, start, end = prepare_topic_for_db_insert(
                topic_or_category.text_content())
            total_topics += 1
            topic, is_new = models.Topic.objects.get_or_create(
                name=topic,
                topic_start_year=start,
                topic_end_year=end,
                category=category)
            if is_new:
                new_topics += 1
            _logger.info('Syncing topic %s' % topic.name)
            topic_url = list(topic_or_category.iterlinks())[0][2]
            if not topic_url.startswith('http://'):
                topic_url = '%s/%s' % (settings.TOPICS_ROOT_URL, topic_url)
            topic_page = html.fromstring(urllib.urlopen(topic_url).read())
            topic.intro_text = list(topic_page.iterdescendants(
                'p'))[0].text_content().encode('utf-8')
            topic.important_dates = list(topic_page.iterdescendants(
                'ul'))[0].text_content().encode('utf-8')
            topic.suggested_search_terms = list(
                topic_page.iterdescendants('ul'))[1].text_content().encode(
                    'utf-8')
            topic.save()
            pages = list(topic_page.iterdescendants('ul'))[-1]
            for page in pages:
                page_url = list(page.iterlinks())[0][2]
                params = page_url.split('/')
                chronam_page = None
                try:
                    params = params[params.index('lccn') + 1:]
                    chronam_page = utils.get_page(params[0], params[1],
                                                  params[2][-1:],
                                                  params[3][-1:])
                    _logger.info('Syncing topic with page :- lccn:%s.' %
                                 params[0])

                except ValueError:
                    pass
                except Http404:
                    pass

                models.TopicPages.objects.get_or_create(
                    page=chronam_page,
                    topic=topic,
                    query_params=params[-1],
                    url=page_url,
                    title=list(page.iterlinks())[0][0].text,
                    description=page.text_content().lstrip(
                        list(page.iterchildren())[0].text).lstrip('"').lstrip(
                            ','))