예제 #1
0
def update_tasks(user=None, task=None):
    """Examines the status of all CondorJob objects. If the status of upstream subtasks and tasks need changing, then this is done.
    If requested, can filter by a specific user or subtask
    """
    
    
    #Step 1: Get a list of running tasks
    #log.debug('Checking running tasks')
    tasks = Task.objects.filter(status='running')
    if user:
        tasks = tasks.filter(user = user)
    if task:
        tasks = tasks.filter(id=task.id)
    
    for task in tasks:
        #Next, get the corresponding running subtasks
        subtasks = Subtask.objects.filter(task=task).filter(status='running')
        log.debug(subtasks)
        for subtask in subtasks:
            #log.debug('Checking subtask status: %s'%subtask.status)
            jobs = CondorJob.objects.filter(subtask=subtask)
            
            
            #Does any of the jobs have an error status? Then mark the whole task as having failed
            errors = jobs.filter(status='E') | jobs.filter(status='H')
            if errors.count() > 0:
                for job in errors:
                    log.debug('Job %d.%d has status %s. Marking task as errored' % (job.subtask.cluster_id, job.process_id, job.status))
                subtask.status = 'error'
                task.status = 'error'
                subtask.finish_time = now()
                subtask.save()
                task.save()
                break
                #TODO: Can we have a more graceful error handling procedure here?
                
            
            #Next, check to see if all the jobs have finished
            finished = jobs.filter(status='F')
            if finished.count() == jobs.count():
                #The subtask has finished!
                log.debug('Task %s, subtask %d: successfully finished. Updating status' % (task.name, subtask.index))
                subtask.status = 'finished'
                subtask.set_run_time() #Set the run time as the sum from the associated jobs
                subtask.set_job_count() #And the number of condor jobs
                subtask.finish_time = now()
                subtask.save()
                    
            else:
                #Something not right. TODO: determine if bad exit status, files not transferred yet, etc., and respond appropriatley
                #log.debug('%d jobs still in queue.' % (jobs.count() - finished.count()))
                pass
            
    
        #Now go through the subtasks and submit any that are waiting, provided that their preceding one has finished
        
        subtasks = Subtask.objects.filter(task=task).filter(status='waiting').order_by('index')
        for subtask in subtasks:
            try:
                if subtask.index > 1:
                    previous_subtasks = Subtask.objects.filter(task=task, index=(subtask.index -1))
                    all_previous_subtasks_finished = True
                    for previous_subtask in previous_subtasks:
                        if previous_subtask.status != 'finished': all_previous_subtasks_finished = False
                    if all_previous_subtasks_finished:
                        #We have a new subtask to submit
                        TaskClass = tools.get_task_class(task.task_type)
                        task_instance = TaskClass(task)
                        log.debug('Preparing new subtask %d' % (subtask.index))
                        prepared_subtask = task_instance.prepare_subtask(subtask.index)
                        #If this wasn't a local subtask, submit to condor  
                        if not subtask.local:
                            condor_tools.submit_task(prepared_subtask)
            except Exception, e:
                subtask.status = 'error'
                subtask.set_job_count()
                subtask.set_run_time()
                subtask.finish_time=  now()
                subtask.save()
                
                task.status = 'error'
                
                task.set_job_count()
                task.set_run_time()
                task.set_custom_field('error', str(e))
                task.finish_time = now()
                task.save()
                email_tools.send_task_completion_email(task)
                
        #Get the list of subtasks again
        task_subtasks = Subtask.objects.filter(task=task)
        finished = task_subtasks.filter(status='finished').order_by('index')
        if task_subtasks.count() == finished.count():
            task.status = 'finished'
            task.finish_time = now()
            log.debug('Task %s (user %s), all subtasks finished. Marking task as finished.' % (task.name, task.user.username))
            task.set_run_time()
            task.set_job_count()
            #task.trim_condor_jobs() Don't do this, it breaks plugin functionality
            
            task.save()
            email_tools.send_task_completion_email(task)
            
        task.last_update_time=now()
        task.save()
예제 #2
0
    def post(self, request, *args, **kwargs):
        assert isinstance(request, HttpRequest)
        assert request.META['CONTENT_TYPE'] == 'application/json'
        json_data = request.body
        data = json.loads(json_data)

        pool_id = data['pool_id']
        secret_key = data['secret_key']

        pool = EC2Pool.objects.get(uuid=pool_id)
        assert pool.secret_key == secret_key

        #Get the condor jobs associated with the condor pool which we think are still running
        pool_jobs = CondorJob.objects.filter(subtask__task__condor_pool=pool)
        running_jobs = pool_jobs.filter(queue_status='R')
        idle_jobs = pool_jobs.filter(queue_status='I')
        held_jobs = pool_jobs.filter(queue_status='H')

        queued_jobs = running_jobs | idle_jobs | held_jobs

        for condor_queue_id, queue_status in data['condor_jobs']:
            condor_job = queued_jobs.get(queue_id=condor_queue_id)
            condor_job.queue_status = queue_status
            condor_job.save()

            #Since this job appeared in the list, it's not finished

            queued_jobs = queued_jobs.exclude(id=condor_job.id)

        #Assume that everything left in queued_jobs has finished
        for job in queued_jobs:
            job.queue_status = 'F'
            job.save()

        #Get all subtasks that are running on the pool

        active_subtasks = Subtask.objects.filter(
            task__condor_pool=pool).filter(active=True)

        for subtask in active_subtasks:
            #Look at all the jobs. Are they all finished?
            all_jobs_finished = True
            errors = False

            for job in subtask.condorjob_set.all():
                if job.queue_status != 'F':
                    all_jobs_finished = False
                elif job.queue_status == 'H':
                    errors = True

            if errors:
                print(sys.stderr, 'Error!')
                subtask.active = False
                subtask.status = 'error'
                subtask.save()
                subtask.task.status = 'error'
                subtask.task.save()

            elif all_jobs_finished:
                print >> sys.stderr, 'All jobs finished'
                subtask.active = False
                subtask.status = 'finished'
                subtask.save()

                #Is there another subtask to run?
                TaskClass = tools.get_task_class(subtask.task.task_type)

                subtask_count = TaskClass.subtasks

                task_instance = TaskClass(subtask.task)

                if subtask.index < subtask_count:
                    #We have another subtask to run
                    print('Another subtask to run!')

                    task_instance.submit_subtask(subtask.index + 1)

                else:
                    #The task must have finished
                    #Request the transfer of files
                    task_instance.request_file_transfer(
                        subtask.index, 'finished')

        #Finally, add instance alarms to the task if needed:
        try:
            ec2_tools.add_instances_alarms(pool)
        except Exception as e:
            log.exception(e)

        #Construct a json response to send back
        response_data = {'status': 'created'}
        json_response = json.dumps(response_data)

        return HttpResponse(json_response,
                            content_type="application/json",
                            status=201)
예제 #3
0
    def form_valid(self, form, *args, **kwargs):

        #Check we are authorized to run on this pool
        compute_pool = form.cleaned_data['compute_pool']
        request = self.request

        assert isinstance(compute_pool, CondorPool)
        assert compute_pool.user == request.user

        log.debug('Submitting task to compute pool %s (%s)' %
                  (compute_pool.name, compute_pool.get_pool_type()))

        ########################################################################
        #Process the uploaded copasi file (and other files?)
        ########################################################################

        #Handle uploaded files...
        #Ensure the directory we're adding the file to exists
        if not os.path.exists(settings.STORAGE_DIR):
            os.mkdir(settings.STORAGE_DIR)

        #And the directory for the user
        #Takes the form userid.username
        user_dir = '%d.%s' % (request.user.id, request.user.username)
        user_dir_path = os.path.join(settings.STORAGE_DIR, user_dir)
        if not os.path.exists(user_dir_path):
            os.mkdir(user_dir_path)

        task = Task()
        task.name = form.cleaned_data['name']
        task.condor_pool = form.cleaned_data['compute_pool']
        task.user = request.user
        task.task_type = form.cleaned_data['task_type']

        task.original_model = 'original_model.cps'

        #Get a list of all fields that are only in the task form, and not in the base

        extra_fields = []
        base_form = base.BaseTaskForm
        for field_name in form.fields:
            if field_name not in base_form.base_fields:
                extra_fields.append((field_name, form.fields[field_name]))

        #We have not yet created the directory to hold the files
        directory_created = False
        task.save()  # Save the task so we can get a valid id

        #Save the custom task fields
        for field_name, field_object in extra_fields:
            #TODO: Is the file a zip file? Try unzipping it...
            if isinstance(field_object, forms.FileField) and isinstance(
                    form.cleaned_data[field_name], UploadedFile):
                try:
                    #Create a directory to store the files for the task
                    #This will just be the id of the task
                    task_dir = str(task.id)
                    task_dir_path = os.path.join(user_dir_path, task_dir)

                    if os.path.exists(task_dir_path):
                        os.rename(
                            task_dir_path,
                            task_dir_path + '.old.' + str(datetime.now()))

                    os.mkdir(task_dir_path)
                    directory_created = True

                    data_file = request.FILES[field_name]
                    filename = data_file.name
                    data_destination = os.path.join(task_dir_path, filename)
                    form_tools.handle_uploaded_file(data_file,
                                                    data_destination)

                    #Next, attempt to extract the file
                    #If this fails, assume the file is an ASCII data file, not a zip file
                    try:
                        data_files_list = []
                        z = zipfile.ZipFile(data_destination)
                        #Record the name of each file in the zipfile

                        for name in z.namelist():
                            data_files_list.append(name)

                        z.extractall(task_dir_path)
                    except zipfile.BadZipfile:
                        data_files_list = []
                        #Assume instead that, if not a zip file, the file must be a data file, so leave it be.
                        #Write the name of the data file to data_files_list
                        data_files_list.append(filename)
                    task.set_custom_field('data_files', data_files_list)
                except Exception as e:
                    log.exception(e)
                    error_messages = [
                        'An error occured while preparing the task data files',
                        str(e),
                    ]
                    form._errors[NON_FIELD_ERRORS] = forms.forms.ErrorList(
                        error_messages)
                    try:
                        shutil.rmtree(task.directory)
                    except:
                        pass
                    try:
                        task.delete()
                    except:
                        pass
                    kwargs['form'] = form
                    return self.form_invalid(self, *args, **kwargs)

            else:
                task.set_custom_field(field_name,
                                      form.cleaned_data[field_name])

        task.save()

        try:
            if not directory_created:
                #Create a directory to store the files for the task
                #This will just be the id of the task
                task_dir = str(task.id)
                task_dir_path = os.path.join(user_dir_path, task_dir)

                if os.path.exists(task_dir_path):
                    os.rename(task_dir_path,
                              task_dir_path + '.old.' + str(datetime.now()))

                os.mkdir(task_dir_path)

            task.directory = task_dir_path
            task.save()
            #Next we need to create the directory to store the files for the task

            #working_dir = tempfile.mkdtemp(dir=settings.STORAGE_DIR)
            model_file = request.FILES['model_file']

            full_filename = os.path.join(task_dir_path, task.original_model)

            form_tools.handle_uploaded_file(model_file, full_filename)

            TaskClass = tools.get_task_class(form.cleaned_data['task_type'])

            task_instance = TaskClass(task)
        except Exception as e:
            log.exception(e)
            error_messages = [
                'An error occured while preparing the task model file',
                str(e),
            ]
            form._errors[NON_FIELD_ERRORS] = forms.forms.ErrorList(
                error_messages)
            try:
                shutil.rmtree(task.directory)
            except:
                pass
            try:
                task.delete()
            except:
                pass
            kwargs['form'] = form
            return self.form_invalid(self, *args, **kwargs)

        #Validate the task
        valid = task_instance.validate()
        if valid != True:
            #valid message contained in valid hopefully.
            error_messages = [
                'Model file is not valid for the current task type',
                str(valid),
            ]
            form._errors[NON_FIELD_ERRORS] = forms.forms.ErrorList(
                error_messages)
            shutil.rmtree(task.directory)
            task.delete()
            kwargs['form'] = form

            return self.form_invalid(self, *args, **kwargs)

        try:
            task_instance.initialize_subtasks()

            subtask = task_instance.prepare_subtask(1)

            condor_tools.submit_task(subtask)

            task.status = 'running'
            task.save()
        except Exception as e:
            log.exception(e)
            error_messages = [
                'An error occured while preparing the subtask',
                str(e),
            ]
            form._errors[NON_FIELD_ERRORS] = forms.forms.ErrorList(
                error_messages)
            try:
                shutil.rmtree(task.directory)
            except:
                pass
            try:
                task.delete()
            except:
                pass
            kwargs['form'] = form
            return self.form_invalid(self, *args, **kwargs)

        return HttpResponseRedirect(
            reverse_lazy('task_details', kwargs={'task_id': task.id}))
예제 #4
0
def update_tasks(user=None, task=None):
    """Examines the status of all CondorJob objects. If the status of upstream subtasks and tasks need changing, then this is done.
    If requested, can filter by a specific user or subtask
    """

    #Step 1: Get a list of running tasks
    #log.debug('Checking running tasks')
    tasks = Task.objects.filter(status='running')
    if user:
        tasks = tasks.filter(user=user)
    if task:
        tasks = tasks.filter(id=task.id)

    for task in tasks:
        #Next, get the corresponding running subtasks
        subtasks = Subtask.objects.filter(task=task).filter(status='running')
        log.debug(subtasks)
        for subtask in subtasks:
            #log.debug('Checking subtask status: %s'%subtask.status)
            jobs = CondorJob.objects.filter(subtask=subtask)

            #Does any of the jobs have an error status? Then mark the whole task as having failed
            errors = jobs.filter(status='E') | jobs.filter(status='H')
            if errors.count() > 0:
                for job in errors:
                    log.debug(
                        'Job %d.%d has status %s. Marking task as errored' %
                        (job.subtask.cluster_id, job.process_id, job.status))
                subtask.status = 'error'
                task.status = 'error'
                subtask.finish_time = now()
                subtask.save()
                task.save()
                break
                #TODO: Can we have a more graceful error handling procedure here?

            #Next, check to see if all the jobs have finished
            finished = jobs.filter(status='F')
            if finished.count() == jobs.count():
                #The subtask has finished!
                log.debug(
                    'Task %s, subtask %d: successfully finished. Updating status'
                    % (task.name, subtask.index))
                subtask.status = 'finished'
                subtask.set_run_time(
                )  #Set the run time as the sum from the associated jobs
                subtask.set_job_count()  #And the number of condor jobs
                subtask.finish_time = now()
                subtask.save()

            else:
                #Something not right. TODO: determine if bad exit status, files not transferred yet, etc., and respond appropriatley
                #log.debug('%d jobs still in queue.' % (jobs.count() - finished.count()))
                pass

        #Now go through the subtasks and submit any that are waiting, provided that their preceding one has finished

        subtasks = Subtask.objects.filter(task=task).filter(
            status='waiting').order_by('index')
        for subtask in subtasks:
            try:
                if subtask.index > 1:
                    previous_subtasks = Subtask.objects.filter(
                        task=task, index=(subtask.index - 1))
                    all_previous_subtasks_finished = True
                    for previous_subtask in previous_subtasks:
                        if previous_subtask.status != 'finished':
                            all_previous_subtasks_finished = False
                    if all_previous_subtasks_finished:
                        #We have a new subtask to submit
                        TaskClass = tools.get_task_class(task.task_type)
                        task_instance = TaskClass(task)
                        log.debug('Preparing new subtask %d' % (subtask.index))
                        prepared_subtask = task_instance.prepare_subtask(
                            subtask.index)
                        #If this wasn't a local subtask, submit to condor
                        if not subtask.local:
                            condor_tools.submit_task(prepared_subtask)
            except Exception as e:
                subtask.status = 'error'
                subtask.set_job_count()
                subtask.set_run_time()
                subtask.finish_time = now()
                subtask.save()

                task.status = 'error'

                task.set_job_count()
                task.set_run_time()
                task.set_custom_field('error', str(e))
                task.finish_time = now()
                task.save()
                email_tools.send_task_completion_email(task)

        #Get the list of subtasks again
        task_subtasks = Subtask.objects.filter(task=task)
        finished = task_subtasks.filter(status='finished').order_by('index')
        if task_subtasks.count() == finished.count():
            task.status = 'finished'
            task.finish_time = now()
            log.debug(
                'Task %s (user %s), all subtasks finished. Marking task as finished.'
                % (task.name, task.user.username))
            task.set_run_time()
            task.set_job_count()
            #task.trim_condor_jobs() Don't do this, it breaks plugin functionality

            task.save()
            email_tools.send_task_completion_email(task)

        task.last_update_time = now()
        task.save()
예제 #5
0
    def post(self, request, *args, **kwargs):
        assert isinstance(request, HttpRequest)
        assert request.META['CONTENT_TYPE'] == 'application/json'
        json_data=request.body
        data = json.loads(json_data)

        pool_id = data['pool_id']
        secret_key = data['secret_key']
        
        
        pool=EC2Pool.objects.get(uuid=pool_id)
        assert pool.secret_key == secret_key
        
        
        #Get the condor jobs associated with the condor pool which we think are still running
        pool_jobs = CondorJob.objects.filter(subtask__task__condor_pool=pool)
        running_jobs = pool_jobs.filter(queue_status='R')
        idle_jobs = pool_jobs.filter(queue_status='I')
        held_jobs = pool_jobs.filter(queue_status='H')
        
        queued_jobs = running_jobs | idle_jobs | held_jobs
        
        
        for condor_queue_id, queue_status in data['condor_jobs']:
            condor_job = queued_jobs.get(queue_id=condor_queue_id)
            condor_job.queue_status=queue_status
            condor_job.save()
            
            #Since this job appeared in the list, it's not finished
            
            queued_jobs = queued_jobs.exclude(id=condor_job.id)
        
        #Assume that everything left in queued_jobs has finished
        for job in queued_jobs:
            job.queue_status = 'F'
            job.save()

        
        #Get all subtasks that are running on the pool
        
        active_subtasks = Subtask.objects.filter(task__condor_pool=pool).filter(active=True)
        
        for subtask in active_subtasks:
            #Look at all the jobs. Are they all finished?
            all_jobs_finished = True
            errors = False
            
            for job in subtask.condorjob_set.all():
                if job.queue_status != 'F':
                    all_jobs_finished = False
                elif job.queue_status == 'H':
                    errors = True
            
            if errors:
                print sys.stderr, 'Error!'
                subtask.active=False
                subtask.status='error'
                subtask.save()
                subtask.task.status='error'
                subtask.task.save()
                
            elif all_jobs_finished:
                print >>sys.stderr, 'All jobs finished'
                subtask.active = False
                subtask.status = 'finished'
                subtask.save()
                
                
                #Is there another subtask to run?
                TaskClass = tools.get_task_class(subtask.task.task_type)
                
                subtask_count = TaskClass.subtasks
                
                task_instance = TaskClass(subtask.task)
                
                if subtask.index < subtask_count:
                    #We have another subtask to run
                    print 'Another subtask to run!'
                    
                    task_instance.submit_subtask(subtask.index + 1)
                    
                else:
                    #The task must have finished
                    #Request the transfer of files
                    task_instance.request_file_transfer(subtask.index, 'finished')
                    
        
        #Finally, add instance alarms to the task if needed:
        try:
            ec2_tools.add_instances_alarms(pool)
        except Exception, e:
            log.exception(e)