Example #1
0
def openoni_topic(request, topic_id):
    topic = get_object_or_404(models.Topic, pk=topic_id)
    page_title = topic.name
    crumbs = list(settings.BASE_CRUMBS)
    if urlresolvers.reverse('recommended_topics') in request.META.get('HTTP_REFERER'):
        crumbs.extend([{'label': 'Recommended Topics',        
                        'href': urlresolvers.reverse('recommended_topics')},
                       {'label': topic.name,
                        'href': urlresolvers.reverse('openoni_topic', 
                                              kwargs={'topic_id': topic.pk})}])
    else:
        referer = re.sub('^https?:\/\/', '', request.META.get('HTTP_REFERER')).split('/')
        try:
            lccn, date, edition, sequence = referer[2], referer[3], referer[4][-1], referer[5][-1]
            page = get_page(lccn, date, edition, sequence)
            if page: 
                title, issue, page = _get_tip(lccn, date, edition, sequence)
                crumbs = create_crumbs(title, issue, date, edition, page)
                crumbs.extend([{'label': topic.name,
                                'href': urlresolvers.reverse('openoni_topic',
                                              kwargs={'topic_id': topic.pk})}])
        except:
            pass
    important_dates = filter(lambda s: not s.isspace(), topic.important_dates.split('\n '))
    search_suggestions = topic.suggested_search_terms.split('\t')
    openoni_pages = [{'title': t.title, 'description': t.description.lstrip(t.title),
                      'url': t.url} for t in topic.topicpages_set.all()]
    return render_to_response('topic.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Example #2
0
def page_rdf(request, lccn, date, edition, sequence):
    page = get_page(lccn, date, edition, sequence)
    graph = page_to_graph(page)
    response = HttpResponse(graph.serialize(base=_rdf_base(request),
                                            include_base=True),
                            content_type='application/rdf+xml')
    return response
Example #3
0
def similar_pages(page):
    solr = SolrConnection(settings.SOLR)
    d = page.issue.date_issued
    year, month, day = '{0:02d}'.format(d.year), '{0:02d}'.format(d.month), '{0:02d}'.format(d.day) 
    date = ''.join(map(str, (year, month, day)))

    query = '+type:page AND date:%s AND %s AND NOT(lccn:%s)' % (date, query_join(map(lambda p: p.city, 
                                           page.issue.title.places.all()), 'city'), page.issue.title.lccn)
    response = solr.query(query, rows=25)
    results = response.results
    return map(lambda kwargs: utils.get_page(**kwargs), 
               map(lambda r: urlresolvers.resolve(r['id']).kwargs, results))
Example #4
0
def page_print(request, lccn, date, edition, sequence,
               width, height, x1, y1, x2, y2):
    page = get_page(lccn, date, edition, sequence)
    title = get_object_or_404(models.Title, lccn=lccn)
    issue = page.issue
    page_title = "%s, %s, %s" % (label(title), label(issue), label(page))
    crumbs = create_crumbs(title, issue, date, edition, page)
    host = request.get_host()
    image_credit = page.issue.batch.awardee.name
    path_parts = dict(lccn=lccn, date=date, edition=edition,
                      sequence=sequence,
                      width=width, height=height,
                      x1=x1, y1=y1, x2=x2, y2=y2)
    url = urlresolvers.reverse('openoni_page_print',
                               kwargs=path_parts)

    return render_to_response('page_print.html', dictionary=locals(),
                              context_instance=RequestContext(request))
Example #5
0
def load_topic_and_categories():
    """
    This function takes a list topics/topic_categories and creates 
    instances of models.Topic and models.TopicCategory exist with the 
    given name, if one such instance doesn't already exist.

    #TODO: some parts of the code has ugly hacks to scrub text out of 
           html. This will fail if structure of target html changes. 
           Revisit! 
    """
    page = html.fromstring(urllib.urlopen("%s%s" % (settings.TOPICS_ROOT_URL, settings.TOPICS_SUBJECT_URL)).read())
    total_topics = total_categories = new_topics = new_categories = filed_topics = 0
    topics = list(page.iterdescendants("li"))
    category = None
    for topic_or_category in topics:
        if topic_or_category.text:
            # its a category, check if exists/ create one
            total_categories += 1
            category_name = topic_or_category.text.rstrip(":")
            category, is_new = models.TopicCategory.objects.get_or_create(name=category_name)
            if is_new:
                new_categories += 1
            _logger.info("Syncing category %s" % category_name)
        else:
            topic, start, end = prepare_topic_for_db_insert(topic_or_category.text_content())
            total_topics += 1
            topic, is_new = models.Topic.objects.get_or_create(
                name=topic, topic_start_year=start, topic_end_year=end, category=category
            )
            if is_new:
                new_topics += 1
            _logger.info("Syncing topic %s" % topic.name)
            topic_url = list(topic_or_category.iterlinks())[0][2]
            if not topic_url.startswith("http://"):
                topic_url = "%s/%s" % (settings.TOPICS_ROOT_URL, topic_url)
            topic_page = html.fromstring(urllib.urlopen(topic_url).read())
            topic.intro_text = list(topic_page.iterdescendants("p"))[0].text_content().encode("utf-8")
            topic.important_dates = list(topic_page.iterdescendants("ul"))[0].text_content().encode("utf-8")
            topic.suggested_search_terms = list(topic_page.iterdescendants("ul"))[1].text_content().encode("utf-8")
            topic.save()
            pages = list(topic_page.iterdescendants("ul"))[-1]
            for page in pages:
                page_url = list(page.iterlinks())[0][2]
                params = page_url.split("/")
                openoni_page = None
                try:
                    params = params[params.index("lccn") + 1 :]
                    openoni_page = utils.get_page(params[0], params[1], params[2][-1:], params[3][-1:])
                    _logger.info("Syncing topic with page :- lccn:%s." % params[0])

                except ValueError:
                    pass
                except Http404:
                    pass

                models.TopicPages.objects.get_or_create(
                    page=openoni_page,
                    topic=topic,
                    query_params=params[-1],
                    url=page_url,
                    title=list(page.iterlinks())[0][0].text,
                    description=page.text_content().lstrip(list(page.iterchildren())[0].text).lstrip('"').lstrip(","),
                )