Ejemplo n.º 1
0
def etl(pk):

	last_imported = datetime.datetime.now()

	crawler = Crawler.objects.get(pk=pk)
	
	# add to queue
	if crawler.sitemap:
		# get sitemap and add urls to queue
		from opensemanticetl.tasks import index_sitemap
		index_sitemap.apply_async( kwargs={ 'uri': crawler.sitemap }, queue='open_semantic_etl_tasks', priority=5 )
		
	else:
		if crawler.crawler_type=="DOMAIN" or crawler.crawler_type=="PATH":
			# add website to queue
			from opensemanticetl.tasks import index_web_crawl
			index_web_crawl.apply_async( kwargs={ 'uri': crawler.uri, 'crawler_type': crawler.crawler_type }, queue='open_semantic_etl_tasks', priority=5 )
		else:
			# add web page to queue
			from opensemanticetl.tasks import index_web
			index_web.apply_async( kwargs={ 'uri': crawler.uri }, queue='open_semantic_etl_tasks', priority=5 )

	# save new timestamp
	crawler.last_imported = last_imported
	crawler.save()
Ejemplo n.º 2
0
def queue_index_web(request):
    uri = request.GET["uri"]

    result = index_web.apply_async(kwargs={'uri': uri},
                                   queue='tasks',
                                   priority=5)

    return HttpResponse(json.dumps({'queue': result.id}),
                        content_type="application/json")
def queue_index_web(request):

    uri = request.GET["uri"]

    from opensemanticetl.tasks import index_web

    result = index_web.apply_async(kwargs={'uri': uri},
                                   queue='open_semantic_etl_tasks',
                                   priority=5)

    return HttpResponse(json.dumps({'queue': result.id}),
                        content_type="application/json")