def celery_nlp_new_collection(request): """ Create a new collection (get documents via Solr requests) and start NLP. @param request: @return: """ if request.method == 'POST': # template = loader.get_template('datamining/start.html') # context = RequestContext(request, { # # }) try: logger.debug('Received POST: ', request.POST) # print request.POST package_id = request.POST['package_id'] content_type = request.POST['content_type'] # additional_and = request.POST['additional_and'] # additional_and_not = request.POST['additional_and_not'] tar_path = request.POST['tar_path'] # build the query # solr_query = build_query(package_id, content_type, additional_and, additional_and_not) solr_query = build_query(package_id, content_type) # print solr_query logger.debug('Solr query: %s' % solr_query) ner_model = request.POST['ner_model'] # category_model = request.POST['category_model'] datamining_main = DMMainTask() taskid = uuid.uuid4().__str__() details = { 'solr_query': solr_query, 'ner_model': ner_model, # 'category_model': category_model, 'tar_path': tar_path } print details # use kwargs, those can be seen in Celery Flower t_context = DefaultTaskContext('', '', 'workers.tasks.DMMainTask', None, '', None) datamining_main.apply_async((t_context, ), kwargs=details, queue='default', task_id=taskid) except Exception, e: logger.debug(e.message) # display error message to user template = loader.get_template('datamining/feedback.html') context = RequestContext( request, {'status': 'An error occurred: %s' % e.message}) return HttpResponse(template.render(context)) template = loader.get_template('datamining/feedback.html') context = RequestContext( request, {'status': 'NLP processing has been initiated.'}) return HttpResponse(template.render(context))
def initialize_dip(dip_creation_process_name): dip = DIP.objects.create(name=dip_creation_process_name) uuid = getUniqueID() wf = WorkflowModules.objects.get(identifier=AIPtoDIPReset.__name__) InformationPackage.objects.create(path=os.path.join( config_path_work, uuid), uuid=uuid, statusprocess=0, packagename=dip_creation_process_name, last_task=wf) work_dir = "%s/%s" % (config_path_work, uuid) task_context = DefaultTaskContext( uuid, work_dir, 'AIPtoDIPReset', None, {'packagename': dip_creation_process_name}, None) AIPtoDIPReset().apply((task_context, ), queue='default')
def celery_nlp_existing_collection(request): """ Use an existing document collection to perform NLP on. @param request: @return: """ if request.method == 'POST': # template = loader.get_template('datamining/start.html') # context = RequestContext(request, { # # }) try: logger.debug('Received POST: ', request.POST) # print request.POST tar_path = request.POST['tar_path'] ner_model = request.POST['ner_model'] # category_model = request.POST['category_model'] datamining_main = DMMainTask() taskid = uuid.uuid4().__str__() details = { 'ner_model': ner_model, # 'category_model': category_model, 'tar_path': tar_path } print details # use kwargs, those can be seen in Celery Flower t_context = DefaultTaskContext('', '', 'workers.tasks.DMMainTask', None, '', None) datamining_main.apply_async((t_context, ), kwargs=details, queue='default', task_id=taskid) except Exception, e: logger.debug(e.message) # display error message to user template = loader.get_template('datamining/feedback.html') context = RequestContext( request, {'status': 'An error occurred: %s' % e.message}) return HttpResponse(template.render(context)) template = loader.get_template('datamining/feedback.html') context = RequestContext( request, {'status': 'NLP processing has been initiated.'}) return HttpResponse(template.render(context))
def execute_chain(request): """ Execute selected task using selected information package as input. Task modules are registered in WorkflowModules. The identifier of a workflow module corresponds with the task's class name. The task is executed using celery's 'apply_async' method. @type request: django.core.handlers.wsgi.WSGIRequest @param request: Request @rtype: django.http.JsonResponse @return: JSON response (task execution metadata) """ data = {"success": False, "errmsg": "Unknown error"} try: selected_ip = request.POST['selected_ip'] selected_actions = request.POST['selected_actions'] if not (selected_ip and selected_actions): return JsonResponse({ "success": False, "errmsg": "Missing input parameter!" }) actions = selected_actions.split("+") print actions data = {"success": True, "id": "jobid", "msg": selected_actions} # Get module description of the task to be executed from the database ip = InformationPackage.objects.get(pk=selected_ip) try: action_classes = [] task_chain = [] for act in actions: wfm = WorkflowModules.objects.get(pk=act) taskClass = getattr(tasks, wfm.identifier) print "Executing task %s" % taskClass.name action_classes.append(taskClass) task_chain.append(taskClass().s()) task_chain[0] = action_classes[0]().s( DefaultTaskContext(ip.uuid, ip.path, "", "", {})) job = chain(task_chain).apply_async() data = {"success": True, "id": job.id} # SIPResetType = getattr(tasks, "SIPtoAIPReset") # sipresettask = SIPResetType() # SIPDeliveryValidationType = getattr(tasks, "SIPDeliveryValidation") # sipdeliveryvalidationtask = SIPDeliveryValidationType() # job = chain( # sipresettask.s(DefaultTaskContext(ip.uuid, ip.path, "", "")), # sipdeliveryvalidationtask.s() # ).apply_async(); # data = {"success": True, "id": job.id} # action_classes = [] # for act in actions: # wfm = WorkflowModules.objects.get(pk=act) # taskClass = getattr(tasks, wfm.identifier) # print "Executing task %s" % taskClass.name # # additional input parameters for the task can be passed through using the 'additional_params' dictionary. # additional_data = {'packagename': ip.packagename } # if wfm.identifier == SIPPackaging.__name__: # additional_data['packagename'] = ip.packagename # if wfm.identifier == AIPPackaging.__name__ or wfm.identifier == LilyHDFSUpload.__name__: # additional_data['identifier'] = ip.identifier # if wfm.identifier == AIPStore.__name__: # additional_data['identifier'] = ip.identifier # additional_data['storage_dest'] = config_path_storage # print "Storage destination %s" % addisuccesstional_data['storage_dest'] # if wfm.identifier == DIPAcquireAIPs.__name__ or wfm.identifier == DIPExtractAIPs.__name__: # dip = DIP.objects.get(name=ip.packagename) # selected_aips = {} # for aip in dip.aips.all(): # selected_aips[aip.identifier] = aip.source # additional_data['selected_aips'] = selected_aips # if wfm.identifier == AIPPackageMetsCreation.__name__: # additional_data['identifier'] = ip.identifier # # # # mul.s.apply_async((10,), queue='default') # # # (AIPPackaging(ip.uuid, ip.path, additional_data), DIPAcquireAIPs(ip.uuid, ip.path, additional_data)).apply_async(queue='default') # # # Execute task # job = taskClass().apply_async((ip.uuid, isuccessp.path, additional_data,), queue='default') # data = {"success": True, "id": job.id} except Exception, err: tb = traceback.format_exc() print str(tb) return JsonResponse({ "success": False, "errmsg": "Workflow module not found" }) # # Get the selected information package from the database # ip = InformationPackage.objects.get(pk=selected_ip) # if request.is_ajax(): # try: # # Get task class from module identifier # taskClass = getattr(tasks, wfm.identifier) # print "Executing task %s" % taskClass.name # # additional input parameters for the task can be passed through using the 'additional_params' dictionary. # additional_data = {'packagename': ip.packagename } # if wfm.identifier == SIPPackaging.__name__: # additional_data['packagename'] = ip.packagename # if wfm.identifier == AIPPackaging.__name__ or wfm.identifier == LilyHDFSUpload.__name__: # additional_data['identifier'] = ip.identifier # if wfm.identifier == AIPStore.__name__: # additional_data['identifier'] = ip.identifier # additional_data['storage_dest'] = config_path_storage # print "Storage destination %s" % additional_data['storage_dest'] # if wfm.identifier == DIPAcquireAIPs.__name__ or wfm.identifier == DIPExtractAIPs.__name__: # dip = DIP.objects.get(name=ip.packagename) # selected_aips = {} # for aip in dip.aips.all(): # selected_aips[aip.identifier] = aip.source # additional_data['selected_aips'] = selected_aips # if wfm.identifier == AIPPackageMetsCreation.__name__: # additional_data['identifier'] = ip.identifier # # # Execute task # job = taskClass().apply_async((ip.uuid, ip.path, additional_data,), queue='default') # data = {"success": True, "id": job.id} # except AttributeError, err: # errdetail = """The workflow module '%s' does not exist. # It might be necessary to run 'python ./workers/scantasks.py' to register new or renamed tasks.""" % wfm.identifier # data = {"success": False, "errmsg": "Workflow module '%s' does not exist" % wfm.identifier, "errdetail": errdetail} # else: # data = {"success": False, "errmsg": "not ajax"} except Exception, err: tb = traceback.format_exc() logging.error(str(tb)) data = {"success": False, "errmsg": err.message, "errdetail": str(tb)} return JsonResponse(data)
def apply_task(request): """ Execute selected task using selected information package as input. Task modules are registered in WorkflowModules. The identifier of a workflow module corresponds with the task's class name. The task is executed using celery's 'apply_async' method. @type request: django.core.handlers.wsgi.WSGIRequest @param request: Request @rtype: django.http.JsonResponse @return: JSON response (task execution metadata) """ data = {"success": False, "errmsg": "Unknown error"} try: selected_ip = request.POST['selected_ip'] selected_action = request.POST['selected_action'] if not (selected_ip and selected_action): return JsonResponse({ "success": False, "errmsg": "Missing input parameter!" }) # Get module description of the task to be executed from the database wfm = WorkflowModules.objects.get(pk=selected_action) logging.debug(selected_action) # Get the selected information package from the database ip = InformationPackage.objects.get(pk=selected_ip) if request.is_ajax(): try: # Get task class from module identifier taskClass = getattr(tasks, wfm.identifier) print "Executing task %s" % taskClass.name # additional input parameters for the task can be passed through using the 'additional_params' dictionary. # IMPORTANT: if you want to use any of these parameters in the finalize() function, the task MUST return: # return task_context.additional_input #deserialize ip.additional_data json string from db import json additional_data = json.loads(ip.additional_data) additional_data['packagename'] = ip.packagename if ip.identifier != "": additional_data['identifier'] = ip.identifier if wfm.identifier == AIPStore.__name__ or wfm.identifier == AIPIndexing.__name__: additional_data['storage_dest'] = config_path_storage if ip.storage_loc and ip.storage_loc != '': additional_data['identifier'] = ip.identifier print "Storage destination %s" % additional_data[ 'storage_dest'] if wfm.identifier in [ DIPAcquireAIPs.__name__, DIPAcquireDependentAIPs.__name__, DIPExtractAIPs.__name__ ]: dip = DIP.objects.get(name=ip.packagename) selected_aips = {} for aip in dip.aips.all(): selected_aips[aip.identifier] = aip.source additional_data['selected_aips'] = selected_aips additional_data['storage_dest'] = config_path_storage # on reset, the identifier is removed from context and from the information package record if wfm.identifier == SIPtoAIPReset.__name__ or wfm.identifier == AIPtoDIPReset.__name__: additional_data['identifier'] = '' ip.identifier = '' if wfm.identifier == AIPPackageMetsCreation.__name__: additional_data['parent_id'] = ip.parent_identifier if wfm.identifier == AIPStore.__name__: additional_data['parent_id'] = ip.parent_identifier # the UUID tells us in which folder the parent AIPs' Mets file is located - only in the dev # version of course, probably doesnt work in distributed storage if len(additional_data['parent_id']) > 0: additional_data[ 'parent_path'] = InformationPackage.objects.get( identifier=ip.parent_identifier) else: additional_data['parent_path'] = '' if wfm.identifier == LilyHDFSUpload.__name__: additional_data['storage_loc'] = ip.storage_loc # Execute task task_context = DefaultTaskContext(ip.uuid, ip.path, taskClass.name, None, additional_data, None) job = taskClass().apply_async((task_context, ), queue='default') data = {"success": True, "id": job.id} # persist changes to information package object ip.save() except AttributeError, err: errdetail = """The workflow module '%s' does not exist. It might be necessary to run 'python ./workers/scantasks.py' to register new or renamed tasks.""" % wfm.identifier data = { "success": False, "errmsg": "Workflow module '%s' does not exist" % wfm.identifier, "errdetail": errdetail } else: