Example #1
0
def crawl(request, pk):

	file = Files.objects.get(pk=pk)
	
	# add to queue
	last_imported = datetime.datetime.now()
	index_filedirectory.apply_async( kwargs={ 'filename': file.uri }, queue='tasks', priority=5 )

	# save new timestamp
	file.last_imported = last_imported
	file.save()

	
	return render(request, 'files/files_crawl.html', {'id': pk,})
Example #2
0
def queue_index_file(request):

    uri = request.GET["uri"]

    result = index_filedirectory.apply_async(kwargs={'filename': uri},
                                             queue='tasks',
                                             priority=5)

    return HttpResponse(json.dumps({'queue': result.id}),
                        content_type="application/json")
def queue_index_file(request):

    uri = request.GET["uri"]

    from opensemanticetl.tasks import index_filedirectory

    result = index_filedirectory.apply_async(kwargs={'filename': uri},
                                             queue='open_semantic_etl_tasks',
                                             priority=5)

    return HttpResponse(json.dumps({'queue': result.id}),
                        content_type="application/json")
Example #4
0
def recrawl(request):

    verbose = True

    log = []
    count = 0
    count_queued = 0

    for file in Files.objects.all():

        count += 1

        if verbose:
            log.append(
                "Checking delta time of file or directory: {}".format(file))

        add_to_queue = True

        # If delta 0, no automatic import
        if not file.delta:
            add_to_queue = False

        # If delta time, do not import this file within this time by setting add_to_queue to false
        if file.delta and file.last_imported:

            # when next import allowed (because time delta passed)?
            next_import = file.last_imported + timedelta(minutes=file.delta)

            # don't check time delta if last import in future (i.e. if system time was wrong)
            if file.last_imported < timezone.now():

                # if time for next import not reached, do not index
                if timezone.now() < next_import:
                    add_to_queue = False

            if verbose:
                log.append("Last addition to queue: {}".format(
                    file.last_imported))
                log.append("Next addition to queue: {}".format(next_import))

        if add_to_queue:

            if verbose:
                log.append(
                    "Adding file or directory to queue: {}".format(file))

            # add to queue
            last_imported = datetime.datetime.now()

            from opensemanticetl.tasks import index_filedirectory

            index_filedirectory.apply_async(kwargs={'filename': file.uri},
                                            queue='tasks',
                                            priority=5)

            # save new timestamp
            file.last_imported = last_imported
            file.save()

            count_queued += 1

    #
    # stats / log
    #

    response = "Files or directories to queue: {} of {}".format(
        count_queued, count)

    if len(log) > 0:
        response += "\n\n" + "\n".join(log)

    #
    # return response
    #

    status = HttpResponse(response)
    status["Content-Type"] = "text/plain"
    return status