Example #1
0
def index(request):
    k=50
    context = {}

    boxes = CountryCheckboxes(initial={'country' : [cty[0] for cty in get_country_list()]})
    if 'q' in request.GET:
        q = request.GET['q'].encode('utf-8')
        p = 1
        if 'p' in request.GET:
            p = min(1, int(request.GET['p']))
        es = ESController()
        c = get_selected_country_list(request)
        context.update(es.search(q,c,k,p))
        context.update({
            'linklist' : pager(k,int(context['result_count']),p,10),
            'last_search' : q,
            'last_search_url' : str(reverse('search.views.index')) 
            + '?q=' + str(urllib.quote(q)),
            'page' : p+1 })
        if c:
            context.update({'custom' : True})
            boxes = CountryCheckboxes(initial={'country':c})
    else:
        q = ''
    context.update({'form':boxes})
 
    return render(request, 'search/index.html',context)
Example #2
0
def sites(request):
    sites = Site.objects.all()
    site_list = []
    es = ESController()

    try:
        site_doc_count = es.get_document_count_by_site()
    except:
        site_doc_count = {}

    owners = config_file.get_config().get('bblio','owners').split(';')
    scoreboard = {}
    for o in owners:
        if o != '':
            scoreboard.update({o: {'doc':0, 'site':0}})
    for site in sites:
        s = model_to_dict(site)
        s.update({'doc_count': Document.objects.filter(site_id=site.id).count()})
        s.update({'zero_count': Document.objects.filter(site_id=site.id).filter(isUsed=0).count()})
        try:
            doc = site_doc_count[site.id]
            s.update({'index_count':doc})
            if site.owner:
                scoreboard[site.owner]['site'] += 1
                scoreboard[site.owner]['doc'] += doc
        except:
            s.update({'index_count': 0})

        site_list.append(s)
    if not site_doc_count:
        d = Document.objects.filter(isUsed=0).values('site__owner').annotate(zero_count=Count('site__owner'))
        scoreboard= d

    context = {'sites':site_list, 'score':scoreboard}
    return render(request, 'operations/sites.html',context)
Example #3
0
def site(request, site_id):
    context = {}
    site = None

    if site_id !='0': 
        site = Site.objects.get(pk=site_id)
        if not scraper.scrapeController.get_jobs_for_site(site_id):
            site.running=0
            site.save(update_fields=['running'])
        elif scraper.scrapeController.get_jobs_for_site(site_id) == 'finished':
            site.running=0
            site.save(update_fields=['running'])

    if request.method == 'POST':
        site_form  = SiteForm(request.POST,instance=site)
        if site_form.is_valid():
            new_site = site_form.save()
            if 'crawl' in request.POST:
                if request.POST['crawl']=='yes':
                    crawl(request, new_site.id)
            
            if site_id == '0':
                return HttpResponseRedirect(reverse('site', 
                    kwargs={ 'site_id' : new_site.id}))
        else:
            return HttpResponse('Error fields: ' + str(site_form.errors))
    
    if site_id !='0':
        es = ESController()
        site = Site.objects.get(pk=site_id)
        d = (Document.objects.filter(site_id=site_id)
                .values('id','urlAddress','isUsed')
                .order_by('isUsed','urlAddress'))

        context.update({
            'doc_count' : d.count(),
            'zero_count' : Document.objects.filter(site_id=site_id).filter(isUsed=0).count(),
            'docs':d,
            'running' : site.running})

        try:
            context.update({'index_count' : es.get_document_count_for_site_id(site_id)})
        except:
            pass

        try: 
            context.update({'jobid': site.jobid,
                            'instance_ip': aws.ec2.getInstanceFromInstanceName(site.instance).ip_address})
        except:
            pass
    site_form = SiteForm(instance=site)
    context.update({
            'site_id':site_id,
            'site_form':site_form,})
 
    return render(request, 'operations/site.html',context)    
Example #4
0
def document(request,doc_id):
    import HTMLParser
    es = ESController()
    doc = Document.objects.get(pk=doc_id)
    context = {
            'html': '<code>' + re.sub('\n','</code>\n<code>',cgi.escape(es.get_body_html(doc.document_html))) + '</code>',
            'parsed_text' : es.text_parse(doc),
            'parsed_title' : es.title_parse(doc.document_html)
            }

    return render(request, 'operations/document.html',context)
Example #5
0
def navbar_inclusion():
    es = ESController()    
    instance_ips = [{'url':i.ip_address,'name':i.id} for i in aws.ec2.getCrawlerInstances()]
    return {"es_count":es.get_document_count(), 'crawlers':instance_ips, 'zero_count' : Document.objects.filter(isUsed=0).count()}
Example #6
0
def es_remove_site_from_index(request, site_id):
    es = ESController()
    es.delete_site_id_from_es(site_id)
    return HttpResponseRedirect(request.META.get('HTTP_REFERER'))
Example #7
0
def index_process(site_id):
    es = ESController()
    es.index_site_id(site_id)