コード例 #1
0
ファイル: runs.py プロジェクト: afcarl/loom
 def send_notifications(self):
     context = self.notification_context
     if not context:
         context = {}
     server_url = context.get('server_url')
     context.update({
         'run_url':
         '%s/#/runs/%s/' % (server_url, self.uuid),
         'run_api_url':
         '%s/api/runs/%s/' % (server_url, self.uuid),
         'run_status':
         self.status,
         'run_name_and_id':
         '%s@%s' % (self.name, self.uuid[0:8])
     })
     notification_addresses = []
     if self.notification_addresses:
         notification_addresses = self.notification_addresses
     if get_setting('NOTIFICATION_ADDRESSES'):
         notification_addresses = notification_addresses\
                                  + get_setting('NOTIFICATION_ADDRESSES')
     email_addresses = filter(lambda x: '@' in x, notification_addresses)
     urls = filter(lambda x: '@' not in x, notification_addresses)
     self._send_email_notifications(email_addresses, context)
     self._send_http_notifications(urls, context)
コード例 #2
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def _run_cleanup_task_attempt_playbook(task_attempt):
    env = copy.copy(os.environ)
    playbook = os.path.join(
        get_setting('PLAYBOOK_PATH'),
        get_setting('CLEANUP_TASK_ATTEMPT_PLAYBOOK'))
    cmd_list = ['ansible-playbook',
                '-i', get_setting('ANSIBLE_INVENTORY'),
                playbook,
                # Without this, ansible uses /usr/bin/python,
                # which may be missing needed modules
                '-e', 'ansible_python_interpreter="/usr/bin/env python"',
    ]

    if get_setting('DEBUG'):
        cmd_list.append('-vvvv')

    new_vars = {'LOOM_TASK_ATTEMPT_ID': str(task_attempt.uuid),
                'LOOM_TASK_ATTEMPT_STEP_NAME': task_attempt.name
                }
    env.update(new_vars)

    p = subprocess.Popen(
        cmd_list, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    terminal_output, err_is_empty = p.communicate()
    if p.returncode != 0:
        msg = 'Cleanup failed for task_attempt.uuid="%s" with returncode="%s".' % (
            task_attempt.uuid, p.returncode)
        logger.error(msg)
        task_attempt.add_event(msg,
                               detail=terminal_output,
                               is_error=True)
        raise Exception(msg)
コード例 #3
0
def _run_cleanup_task_playbook(task_attempt):
    env = copy.copy(os.environ)
    playbook = os.path.join(get_setting('PLAYBOOK_PATH'),
                            get_setting('CLEANUP_TASK_ATTEMPT_PLAYBOOK'))
    cmd_list = [
        'ansible-playbook',
        '-i',
        get_setting('ANSIBLE_INVENTORY'),
        playbook,
        # Without this, ansible uses /usr/bin/python,
        # which may be missing needed modules
        '-e',
        'ansible_python_interpreter="/usr/bin/env python"',
    ]

    if get_setting('DEBUG'):
        cmd_list.append('-vvvv')

    new_vars = {
        'LOOM_TASK_ATTEMPT_ID': str(task_attempt.uuid),
        'LOOM_TASK_ATTEMPT_STEP_NAME': task_attempt.task.run.name,
    }
    env.update(new_vars)

    p = subprocess.Popen(cmd_list,
                         env=env,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT)
    terminal_output, err_is_empty = p.communicate()
    if p.returncode != 0:
        msg = 'Cleanup failed for task_attempt.uuid="%s" with returncode="%s".' % (
            task_attempt.uuid, p.returncode)
        logger.error(msg)
        task_attempt.add_event(msg, detail=terminal_output, is_error=True)
        raise Exception(msg)
コード例 #4
0
 def is_unresponsive(self):
     heartbeat = int(get_setting('TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS'))
     timeout = int(get_setting('TASKRUNNER_HEARTBEAT_TIMEOUT_SECONDS'))
     try:
         last_heartbeat = self.task_attempt.last_heartbeat
     except AttributeError:
         # No TaskAttempt selected
         last_heartbeat = self.datetime_created
     return (timezone.now() - last_heartbeat).total_seconds() > timeout
コード例 #5
0
 def cleanup(self):
     if self.status_is_cleaned_up:
         return
     if get_setting('PRESERVE_ALL'):
         self.add_event('Skipped cleanup because PRESERVE_ALL is True',
                        is_error=False)
         return
     if get_setting('PRESERVE_ON_FAILURE') and self.status_is_failed:
         self.add_event('Skipped cleanup because PRESERVE_ON_FAILURE is True',
                        is_error=False)
         return
     async.execute(async.cleanup_task_attempt, self.uuid)
コード例 #6
0
 def _add_url_prefix(cls, path):
     if not path.startswith('/'):
         raise ValidationError(
             'Expected an absolute path but got path="%s"' % path)
     storage_type = get_setting('STORAGE_TYPE')
     if storage_type.lower() == 'local':
         return 'file://' + path
     elif storage_type.lower() == 'google_storage':
         return 'gs://' + get_setting('GOOGLE_STORAGE_BUCKET') + path
     else:
         raise ValidationError(
             'Couldn\'t recognize value for setting STORAGE_TYPE="%s"'\
             % storage_type)
コード例 #7
0
ファイル: data_objects.py プロジェクト: yqwu1983/loom
 def _add_url_prefix(cls, path):
     if not path.startswith('/'):
         raise ValidationError(
             'Expected an absolute path but got path="%s"' % path)
     storage_type = get_setting('STORAGE_TYPE')
     if storage_type.lower() == 'local':
         return 'file://' + path
     elif storage_type.lower() == 'google_storage':
         return 'gs://' + get_setting('GOOGLE_STORAGE_BUCKET') + path
     else:
         raise ValidationError(
             'Couldn\'t recognize value for setting STORAGE_TYPE="%s"'\
             % storage_type)
コード例 #8
0
ファイル: task_attempts.py プロジェクト: afcarl/loom
 def cleanup(self):
     if self.status_is_cleaned_up:
         return
     if get_setting('PRESERVE_ALL'):
         self.add_event('Skipped cleanup because PRESERVE_ALL is True',
                        is_error=False)
         return
     if get_setting('PRESERVE_ON_FAILURE') and self.status_is_failed:
         self.add_event(
             'Skipped cleanup because PRESERVE_ON_FAILURE is True',
             is_error=False)
         return
     async .cleanup_task_attempt(self.uuid)
コード例 #9
0
def check_for_missed_cleanup():
    """Check for TaskAttempts that were never cleaned up
    """
    if get_setting('PRESERVE_ALL'):
        return
    from api.models.tasks import TaskAttempt
    if get_setting('PRESERVE_ON_FAILURE'):
        for task_attempt in TaskAttempt.objects.filter(
                status_is_running=False).filter(
                    status_is_cleaned_up=False).exclude(status_is_failed=True):
            task_attempt.cleanup()
    else:
        for task_attempt in TaskAttempt.objects.filter(
                status_is_running=False).filter(status_is_cleaned_up=False):
            task_attempt.cleanup()
コード例 #10
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def execute_task(task_uuid, force_rerun=False):
    # If task has been run before, old TaskAttempt will be rendered inactive
    from api.models.tasks import Task
    task = Task.objects.get(uuid=task_uuid)
    # Do not run again if already running
    if task.task_attempt and task.is_responsive() and not task.is_timed_out():
        return

    # Use TaskFingerprint to see if a valid TaskAttempt for this fingerprint
    # already exists, or to flag the new TaskAttempt to be reused by other
    # tasks with this fingerprint
    fingerprint = task.get_fingerprint()

    task_attempt = None
    if not force_rerun:
        # By skipping this, a new TaskAttempt will always be created.
        # Use existing TaskAttempt if a valid one exists with the same fingerprint
        if fingerprint.active_task_attempt \
           and fingerprint.active_task_attempt.might_succeed():
            task.activate_task_attempt(fingerprint.active_task_attempt)
            return

    task_attempt = task.create_and_activate_task_attempt()
    fingerprint.update_task_attempt_maybe(task_attempt)
    if get_setting('TEST_NO_RUN_TASK_ATTEMPT'):
        return
    return task_attempt.run_with_heartbeats()
コード例 #11
0
ファイル: runs.py プロジェクト: StanfordBioinformatics/loom
 def push_all_inputs(self):
     if get_setting('TEST_NO_PUSH_INPUTS'):
         return
     unsaved_tasks = {}
     unsaved_task_inputs = []
     unsaved_task_outputs = []
     unsaved_data_nodes = {}
     for leaf in self.get_leaves():
         if leaf.inputs.exists():
             leaf_outputs = leaf.outputs.all()
             for input_set in InputCalculator(leaf)\
                 .get_input_sets():
                 task, task_inputs, task_outputs, data_nodes \
                     = Task.create_unsaved_task_from_input_set(
                         input_set, leaf, leaf_outputs)
                 if task is None:
                     # Task already exists, none to create
                     continue
                 unsaved_tasks[task.uuid] = task
                 unsaved_task_inputs.extend(task_inputs)
                 unsaved_task_outputs.extend(task_outputs)
                 unsaved_data_nodes.update(data_nodes)
         else:
             # Special case: No inputs on leaf node
             task, task_inputs, task_outputs, data_nodes \
                 = Task.create_unsaved_task_from_input_set([], leaf)
             if task is None:
                 continue
             unsaved_tasks[task.uuid] = task
             unsaved_task_inputs.extend(task_inputs)
             unsaved_task_outputs.extend(task_outputs)
             unsaved_data_nodes.update(data_nodes)
     Task.bulk_create_tasks(unsaved_tasks, unsaved_task_inputs,
                            unsaved_task_outputs, unsaved_data_nodes,
                            self.force_rerun)
コード例 #12
0
 def prefetch_list(cls, instances):
     # Since we are prefetching, delete _cached_children to avoid conflicts
     for instance in instances:
         if hasattr(instance, '_cached_children'):
             del instance._cached_children
     instances = list(filter(lambda i: i is not None, instances))
     instances = list(filter(
         lambda i: not hasattr(i, '_prefetched_objects_cache'), instances))
     queryset = DataNode\
                .objects\
                .filter(uuid__in=[i.uuid for i in instances])
     MAXIMUM_TREE_DEPTH = get_setting('MAXIMUM_TREE_DEPTH')
     # Prefetch 'children', 'children__children', etc. up to max depth
     # This incurs 1 query per level up to actual depth.
     # No extra queries incurred if we go too deep.)
     for i in range(1, MAXIMUM_TREE_DEPTH+1):
         queryset = queryset.prefetch_related('__'.join(['children']*i))
     # Transfer prefetched children to original instances
     queried_data_nodes_1 = [node for node in queryset]
     copy_prefetch(queried_data_nodes_1, instances)
     # Flatten tree so we can simultaneously prefetch related models on all nodes
     node_list = []
     for instance in instances:
         node_list.extend(flatten_nodes(instance, 'children'))
     queryset = DataNode.objects.filter(uuid__in=[n.uuid for n in node_list])\
         .prefetch_related('data_object')\
         .prefetch_related('data_object__file_resource')
     # Transfer prefetched data to child nodes on original instances
     queried_data_nodes_2 = [data_node for data_node in queryset]
     copy_prefetch(queried_data_nodes_2, instances, child_field='children',
                   one_to_x_fields=['data_object',])
コード例 #13
0
    def prefetch_list(cls, instances):
        queryset = Template\
                   .objects\
                   .filter(uuid__in=[i.uuid for i in instances])
        MAXIMUM_TREE_DEPTH = get_setting('MAXIMUM_TREE_DEPTH')
        # Prefetch 'children', 'children__children', etc. up to max depth
        # This incurs 1 query per level up to actual depth.
        # No extra queries incurred if we go too deep.)
        for i in range(1, MAXIMUM_TREE_DEPTH+1):
            queryset = queryset.prefetch_related('__'.join(['steps']*i))
        # Transfer prefetched steps to original instances
        queried_templates_1 = [template for template in queryset]
        copy_prefetch(queried_templates_1, instances)
        # Flatten tree so we can simultaneously prefetch related models on all nodes
        node_list = []
        for instance in instances:
            node_list.extend(flatten_nodes(instance, 'steps'))
        queryset = Template.objects.filter(uuid__in=[n.uuid for n in node_list])\
            .prefetch_related('inputs')\
            .prefetch_related('inputs__data_node')
        # Transfer prefetched data to child nodes on original instances
        queried_templates_2 = [template for template in queryset]
        copy_prefetch(queried_templates_2, instances, child_field='steps')
        # Prefetch all data nodes
        data_nodes = []
	for instance in instances:
            instance._get_data_nodes(data_nodes=data_nodes)
	DataNode.prefetch_list(data_nodes)
コード例 #14
0
ファイル: runs.py プロジェクト: StanfordBioinformatics/loom
 def _send_email_notifications(self, email_addresses, context):
     if not email_addresses:
         return
     try:
         text_content = render_to_string('email/notify_run_completed.txt',
                                         context)
         html_content = render_to_string('email/notify_run_completed.html',
                                         context)
         connection = mail.get_connection()
         connection.open()
         email = mail.EmailMultiAlternatives(
             'Loom run %s@%s is %s' % (
                 self.name, self.uuid[0:8], self.status.lower()),
             text_content,
             get_setting('DEFAULT_FROM_EMAIL'),
             email_addresses,
         )
         email.attach_alternative(html_content, "text/html")
         email.send()
         connection.close()
     except Exception as e:
         self.add_event(
             "Email notifications failed", detail=str(e), is_error=True)
         raise
     self.add_event("Email notifications sent",
                    detail=email_addresses, is_error=False)
コード例 #15
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def cleanup_orphaned_task_attempts():
    if get_setting('DISABLE_DELETE'):
        return

    from api.models import TaskAttempt, DataNode
    orphaned_task_attempts = TaskAttempt.objects.filter(
        tasks=None, status_is_initializing=False)
    logger.info('Periodic cleanup of unused files. %s files found.'
                % orphaned_task_attempts.count())
    nodes_to_delete = set()
    for task_attempt in orphaned_task_attempts:
        input_data_nodes = DataNode.objects.filter(
            taskattemptinput__task_attempt__uuid=task_attempt.uuid)
        output_data_nodes = DataNode.objects.filter(
            taskattemptoutput__task_attempt__uuid=task_attempt.uuid)
        for item in input_data_nodes:
            nodes_to_delete.add(item)
        for item in output_data_nodes:
            nodes_to_delete.add(item)
        task_attempt.delete()
    for item in nodes_to_delete:
        try:
            item.delete()
        except models.ProtectedError:
            pass
コード例 #16
0
ファイル: runs.py プロジェクト: StanfordBioinformatics/loom
 def _send_http_notifications(self, urls, context):
     if not urls:
         return
     any_failures = False
     try:
         data = {
             'message': 'Loom run %s is %s' % (
                 context['run_name_and_id'],
                 context['run_status']),
             'run_uuid': self.uuid,
             'run_name': self.name,
             'run_status': self.status,
             'run_url': context['run_url'],
             'run_api_url': context['run_api_url'],
             'server_name': context['server_name'],
             'server_url': context['server_url'],
         }
     except Exception as e:
         self.add_event("Http notification failed", detail=str(e), is_error=True)
         raise
     for url in urls:
         try:
             response = requests.post(
                 url,
                 json=data,
                 verify=get_setting('NOTIFICATION_HTTPS_VERIFY_CERTIFICATE'))
             response.raise_for_status()
         except Exception as e:
             self.add_event("Http notification failed", detail=str(e), is_error=True)
             any_failures = True
     if not any_failures:
         self.add_event("Http notification succeeded", detail=', '.join(urls),
                        is_error=False)
コード例 #17
0
ファイル: runs.py プロジェクト: afcarl/loom
 def _send_email_notifications(self, email_addresses, context):
     if not email_addresses:
         return
     try:
         text_content = render_to_string('email/notify_run_completed.txt',
                                         context)
         html_content = render_to_string('email/notify_run_completed.html',
                                         context)
         connection = mail.get_connection()
         connection.open()
         email = mail.EmailMultiAlternatives(
             'Loom run %s@%s is %s' %
             (self.name, self.uuid[0:8], self.status.lower()),
             text_content,
             get_setting('DEFAULT_FROM_EMAIL'),
             email_addresses,
         )
         email.attach_alternative(html_content, "text/html")
         email.send()
         connection.close()
     except Exception as e:
         self.add_event("Email notifications failed",
                        detail=str(e),
                        is_error=True)
         raise
     self.add_event("Email notifications sent",
                    detail=email_addresses,
                    is_error=False)
コード例 #18
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def clear_expired_logs():
    import elasticsearch
    import curator
    elasticsearch_host = get_setting('ELASTICSEARCH_HOST')
    elasticsearch_port = get_setting('ELASTICSEARCH_PORT')
    elasticsearch_log_expiration_days = get_setting('ELASTICSEARCH_LOG_EXPIRATION_DAYS')
    client = elasticsearch.Elasticsearch([elasticsearch_host], port=elasticsearch_port)
    ilo = curator.IndexList(client)
    ilo.filter_by_regex(kind='prefix', value='logstash-')
    ilo.filter_by_age(source='name', direction='older', timestring='%Y.%m.%d',
                      unit='days', unit_count=elasticsearch_log_expiration_days)
    delete_indices = curator.DeleteIndices(ilo)
    try:
        delete_indices.do_action()
    except curator.exceptions.NoIndices:
        pass
コード例 #19
0
ファイル: views.py プロジェクト: StanfordBioinformatics/loom
 def destroy(self, *args, **kwargs):
     if get_setting('DISABLE_DELETE'):
         return JsonResponse({
             'message': 'Delete is forbidden because DISABLE_DELETE is True.'},
                             status=403)
     else:
         return super(ProtectedDeleteModelViewSet, self).destroy(*args, **kwargs)
コード例 #20
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def check_for_missed_cleanup():
    """Check for TaskAttempts that were never cleaned up
    """
    if get_setting('PRESERVE_ALL'):
        return
    from api.models.tasks import TaskAttempt
    if get_setting('PRESERVE_ON_FAILURE'):
        for task_attempt in TaskAttempt.objects.filter(
                status_is_running=False).filter(
                    status_is_cleaned_up=False).exclude(
                        status_is_failed=True):
            task_attempt.cleanup()
    else:
        for task_attempt in TaskAttempt.objects.filter(
                status_is_running=False).filter(status_is_cleaned_up=False):
            task_attempt.cleanup()
コード例 #21
0
ファイル: views.py プロジェクト: StanfordBioinformatics/loom
def auth_status(request):
    if get_setting('LOGIN_REQUIRED')==False:
        return JsonResponse({'message': 'Authentication not required'})
    elif request.user.is_authenticated():
        return JsonResponse({
            'message': 'User is authenticated as %s' % request.user.username})
    else:
        return JsonResponse({'message': 'User is not authenticated'}, status=401)
コード例 #22
0
 def system_error(self, detail=''):
     self._process_error(
         detail,
         get_setting('MAXIMUM_RETRIES_FOR_SYSTEM_FAILURE'),
         'system_failure_count',
         'System error',
         exponential_delay=True,
     )
コード例 #23
0
ファイル: runs.py プロジェクト: afcarl/loom
 def get_notification_context(cls, request):
     context = {'server_name': get_setting('SERVER_NAME')}
     if request:
         context.update({
             'server_url':
             '%s://%s' % (request.scheme, request.get_host()),
         })
     return context
コード例 #24
0
def auth_status(request):
    if get_setting('LOGIN_REQUIRED') == False:
        return JsonResponse({'message': 'Authentication not required'})
    elif request.user.is_authenticated():
        return JsonResponse(
            {'message': 'User is authenticated as %s' % request.user.username})
    else:
        return JsonResponse({'message': 'User is not authenticated'},
                            status=401)
コード例 #25
0
ファイル: runs.py プロジェクト: afcarl/loom
 def _push_all_inputs(self):
     if get_setting('TEST_NO_PUSH_INPUTS_ON_RUN_CREATION'):
         return
     if self.inputs.exists():
         for input in self.inputs.all():
             self.push(input.channel, [])
     elif self.is_leaf:
         # Special case: No inputs on leaf node
         self._push_input_set([])
コード例 #26
0
def clear_expired_logs():
    import elasticsearch
    import curator
    elasticsearch_host = get_setting('ELASTICSEARCH_HOST')
    elasticsearch_port = get_setting('ELASTICSEARCH_PORT')
    elasticsearch_log_expiration_days = get_setting(
        'ELASTICSEARCH_LOG_EXPIRATION_DAYS')
    client = elasticsearch.Elasticsearch([elasticsearch_host],
                                         port=elasticsearch_port)
    ilo = curator.IndexList(client)
    ilo.filter_by_regex(kind='prefix', value='logstash-')
    ilo.filter_by_age(source='name',
                      direction='older',
                      timestring='%Y.%m.%d',
                      unit='days',
                      unit_count=elasticsearch_log_expiration_days)
    delete_indices = curator.DeleteIndices(ilo)
    delete_indices.do_action()
コード例 #27
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def cleanup_orphaned_file_resources():
    if get_setting('DISABLE_DELETE'):
        return

    from api.models import FileResource
    queryset = FileResource.objects.filter(data_object__isnull=True)
    count = queryset.count()
    logger.info('Periodic cleanup of unused files. %s files found.' % count)
    for file_resource in queryset.all():
        _delete_file_resource(file_resource.id)
コード例 #28
0
ファイル: views.py プロジェクト: migush/elements
def update_source(request):
    source = request.POST.get('source')
    if source:
        try:
            URLValidator()(source)
            api.save_setting(source)
            api.DATA_URL = api.get_setting()
            api_models.ElementsImage.images.init(api.DATA_URL)
        except ValidationError, e:
            print e
コード例 #29
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def cleanup_task_attempt(task_attempt_uuid):
    from api.models.tasks import TaskAttempt
    if get_setting('TEST_NO_TASK_ATTEMPT_CLEANUP'):
        return
    task_attempt = TaskAttempt.objects.get(uuid=task_attempt_uuid)
    _run_cleanup_task_attempt_playbook(task_attempt)
    task_attempt.add_event('Cleaned up',
                           is_error=False)
    task_attempt.setattrs_and_save_with_retries({
        'status_is_cleaned_up': True })
コード例 #30
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def delete_file_resource(file_resource_id):
    from api.models import FileResource
    from loomengine_utils.file_utils import File
    file_resource = FileResource.objects.get(id=file_resource_id)
    file_resource.setattrs_and_save_with_retries({'upload_status': 'deleting'})

    if not file_resource.link:
        # Replace start of URL with path inside Docker container.
        file_url = file_resource.file_url
        if file_url.startswith('file:///'):
            file_url = re.sub(
                '^'+get_setting('STORAGE_ROOT_WITH_PREFIX'),
                get_setting('INTERNAL_STORAGE_ROOT_WITH_PREFIX'),
                file_url)

        file = File(file_url, get_storage_settings(), retry=True)
        file.delete(pruneto=get_setting('INTERNAL_STORAGE_ROOT'))

    file_resource.delete()
コード例 #31
0
ファイル: views.py プロジェクト: StanfordBioinformatics/loom
def info(request):
    if request.user.is_authenticated():
        username = request.user.username
    else:
        username = None
    data = {
        'version': version.version(),
        'username': username,
        'login_required': get_setting('LOGIN_REQUIRED'),
    }
    return JsonResponse(data, status=200)
コード例 #32
0
def info(request):
    if request.user.is_authenticated():
        username = request.user.username
    else:
        username = None
    data = {
        'version': version.version(),
        'username': username,
        'login_required': get_setting('LOGIN_REQUIRED'),
    }
    return JsonResponse(data, status=200)
コード例 #33
0
ファイル: runs.py プロジェクト: StanfordBioinformatics/loom
    def _get_notification_context(self):
        context = {
            'server_name': get_setting('SERVER_NAME')}
        request = self._serializer_context.get('request')
        if request:
            context.update({
                'server_url': '%s://%s' % (
                    request.scheme,
		    request.get_host()),
	    })
        return context
コード例 #34
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def execute(task_function, *args, **kwargs):
    """Run a task asynchronously
    """

    if get_setting('TEST_DISABLE_ASYNC_DELAY'):
        # Delay disabled, run synchronously
        logger.debug('Running function "%s" synchronously because '\
                     'TEST_DISABLE_ASYNC_DELAY is True'
                     % task_function.__name__)
        return task_function(*args, **kwargs)

    db.connections.close_all()
    task_function.delay(*args, **kwargs)
コード例 #35
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def execute_with_delay(task_function, *args, **kwargs):
    """Run a task asynchronously after at least delay_seconds
    """
    delay = kwargs.pop('delay', 0)
    if get_setting('TEST_DISABLE_ASYNC_DELAY'):
        # Delay disabled, run synchronously
        logger.debug('Running function "%s" synchronously because '\
                     'TEST_DISABLE_ASYNC_DELAY is True'
                     % task_function.__name__)
        return task_function(*args, **kwargs)

    db.connections.close_all()
    task_function.apply_async(args=args, kwargs=kwargs, countdown=delay)
コード例 #36
0
ファイル: runs.py プロジェクト: afcarl/loom
 def push(self, channel, data_path):
     """Called when new data is available at the given data_path 
     on the given channel. This will trigger creation of new tasks if 1)
     other input data for those tasks is available, and 2) the task with
     that data_path was not already created previously.
     """
     if get_setting('TEST_NO_CREATE_TASK'):
         return
     if not self.is_leaf:
         return
     for input_set in InputCalculator(self.inputs.all(), channel, data_path)\
         .get_input_sets():
         self._push_input_set(input_set)
コード例 #37
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def send_notifications(run_uuid):
    from api.models import Run
    run = Run.objects.get(uuid=run_uuid)
    context = run.notification_context
    if not context:
        context = {}
    server_url = context.get('server_url')
    context.update({
        'run_url': '%s/#/runs/%s/' % (server_url, run.uuid),
        'run_api_url': '%s/api/runs/%s/' % (server_url, run.uuid),
        'run_status': run.status,
        'run_name_and_id': '%s@%s' % (run.name, run.uuid[0:8])
    })
    notification_addresses = []
    if run.notification_addresses:
        notification_addresses = run.notification_addresses
    if get_setting('NOTIFICATION_ADDRESSES'):
        notification_addresses = notification_addresses\
                                 + get_setting('NOTIFICATION_ADDRESSES')
    email_addresses = filter(lambda x: '@' in x, notification_addresses)
    urls = filter(lambda x: '@' not in x, notification_addresses)
    run._send_email_notifications(email_addresses, context)
    run._send_http_notifications(urls, context)
コード例 #38
0
ファイル: views.py プロジェクト: StanfordBioinformatics/loom
 def get_task_monitor_settings(self, request, uuid=None):
     task_attempt = self._get_task_attempt(request, uuid)
     return JsonResponse({
         'SERVER_NAME': get_setting('SERVER_NAME'),
         'DEBUG': get_setting('DEBUG'),
         'WORKING_DIR_ROOT': os.path.join(
             get_setting('INTERNAL_STORAGE_ROOT'), 'tmp', task_attempt.uuid),
         'DEFAULT_DOCKER_REGISTRY': get_setting('DEFAULT_DOCKER_REGISTRY'),
         'PRESERVE_ALL': get_setting('PRESERVE_ON_FAILURE'),
         'PRESERVE_ON_FAILURE': get_setting('PRESERVE_ON_FAILURE'),
         'HEARTBEAT_INTERVAL_SECONDS':
         get_setting('TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS'),
         # container name is duplicated in TaskAttempt cleanup playbook
         'PROCESS_CONTAINER_NAME': '%s-attempt-%s' % (
             get_setting('SERVER_NAME'), uuid),
     }, status=200)
コード例 #39
0
def _run_with_delay(task_function, args, kwargs):
    """Run a task asynchronously
    """

    if get_setting('TEST_DISABLE_ASYNC_DELAY'):
        # Delay disabled, run synchronously
        logger.debug('Running function "%s" synchronously because '\
                     'TEST_DISABLE_ASYNC_DELAY is True'
                     % task_function.__name__)
        return task_function(*args, **kwargs)

    db.connections.close_all()
    time.sleep(0.0001)  # Release the GIL
    task_function.delay(*args, **kwargs)
コード例 #40
0
def _run_task(task_uuid, delay=0):
    time.sleep(delay)
    # If task has been run before, old TaskAttempt will be rendered inactive
    from api.models.tasks import Task
    task = Task.objects.get(uuid=task_uuid)
    # Do not run again if already running
    if task.task_attempt and not task.is_unresponsive():
        return
    task_attempt = task.create_and_activate_attempt()
    if get_setting('TEST_NO_RUN_TASK_ATTEMPT'):
        logger.debug('Skipping async._run_execute_task_attempt_playbook because'\
                     'TEST_NO_RUN_TASK_ATTEMPT is True')
        return
    _run_with_heartbeats(_run_execute_task_attempt_playbook,
                         task_attempt,
                         args=[task_attempt])
コード例 #41
0
ファイル: runs.py プロジェクト: afcarl/loom
 def _send_http_notifications(self, urls, context):
     if not urls:
         return
     any_failures = False
     try:
         data = {
             'message':
             'Loom run %s is %s' %
             (context['run_name_and_id'], context['run_status']),
             'run_uuid':
             self.uuid,
             'run_name':
             self.name,
             'run_status':
             self.status,
             'run_url':
             context['run_url'],
             'run_api_url':
             context['run_api_url'],
             'server_name':
             context['server_name'],
             'server_url':
             context['server_url'],
         }
     except Exception as e:
         self.add_event("Http notification failed",
                        detail=str(e),
                        is_error=True)
         raise
     for url in urls:
         try:
             response = requests.post(
                 url,
                 json=data,
                 verify=get_setting(
                     'NOTIFICATION_HTTPS_VERIFY_CERTIFICATE'))
             response.raise_for_status()
         except Exception as e:
             self.add_event("Http notification failed",
                            detail=str(e),
                            is_error=True)
             any_failures = True
     if not any_failures:
         self.add_event("Http notification succeeded",
                        detail=', '.join(urls),
                        is_error=False)
コード例 #42
0
    def run_with_heartbeats(self):
        heartbeat_interval = int(get_setting(
            'TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS'))
        # Polling interval should never be less than heartbeat interval
        polling_interval = min(1, heartbeat_interval)

        t = threading.Thread(target=_run_execute_task_attempt_playbook,
                             args=[self,],
                             kwargs=None)
        t.start()

        last_heartbeat = self.last_heartbeat

        while t.is_alive():
            # Beat if (heartbeat_interval - polling_interval) has elapsed,
            # to ensure that we never exceed heartbeat_interval between beats.
            if (datetime.utcnow().replace(tzinfo=pytz.utc) - last_heartbeat)\
               .total_seconds() > (heartbeat_interval - polling_interval):
                last_heartbeat = self.heartbeat()
            time.sleep(polling_interval)
コード例 #43
0
def _run_with_heartbeats(function, task_attempt, args=None, kwargs=None):
    from api.models.tasks import TaskAttempt
    heartbeat_interval = int(
        get_setting('TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS'))
    polling_interval = 1
    if polling_interval > heartbeat_interval:
        raise Exception(
            'TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS cannot be less than '\
            'polling interval "%s"' % polling_interval)

    t = threading.Thread(target=function, args=args, kwargs=kwargs)
    t.start()

    last_heartbeat = task_attempt.last_heartbeat

    while t.is_alive():
        # Beat if (heartbeat_interval - polling_interval) has elapsed,
        # to ensure that we never exceed heartbeat_interval between beats.
        if (datetime.utcnow().replace(tzinfo=pytz.utc) - last_heartbeat)\
           .total_seconds() > (heartbeat_interval - polling_interval):
            last_heartbeat = task_attempt.heartbeat()
        time.sleep(polling_interval)
コード例 #44
0
 def get_task_monitor_settings(self, request, uuid=None):
     task_attempt = self._get_task_attempt(request, uuid)
     return JsonResponse(
         {
             'SERVER_NAME':
             get_setting('SERVER_NAME'),
             'DEBUG':
             get_setting('DEBUG'),
             'WORKING_DIR':
             task_attempt.get_working_dir(),
             'STDOUT_LOG_FILE':
             task_attempt.get_stdout_log_file(),
             'STDERR_LOG_FILE':
             task_attempt.get_stderr_log_file(),
             'DEFAULT_DOCKER_REGISTRY':
             get_setting('DEFAULT_DOCKER_REGISTRY'),
             'PRESERVE_ALL':
             get_setting('PRESERVE_ON_FAILURE'),
             'PRESERVE_ON_FAILURE':
             get_setting('PRESERVE_ON_FAILURE'),
             'HEARTBEAT_INTERVAL_SECONDS':
             get_setting('TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS'),
         },
         status=200)
コード例 #45
0
def _run_execute_task_attempt_playbook(task_attempt):
    from django.contrib.auth.models import User
    from django.db import IntegrityError
    from rest_framework.authtoken.models import Token

    if get_setting('LOGIN_REQUIRED'):
        try:
            loom_user = User.objects.create(username='******')
        except IntegrityError:
            loom_user = User.objects.get(username='******')
        try:
            token = Token.objects.get(user=loom_user).key
        except Token.DoesNotExist:
            token = Token.objects.create(user=loom_user).key
    else:
        token = None

    env = copy.copy(os.environ)
    playbook = os.path.join(
        get_setting('PLAYBOOK_PATH'),
        get_setting('RUN_TASK_ATTEMPT_PLAYBOOK'))
    cmd_list = ['ansible-playbook',
                '-i', get_setting('ANSIBLE_INVENTORY'),
                playbook,
                # Without this, ansible uses /usr/bin/python,
                # which may be missing needed modules
                '-e', 'ansible_python_interpreter="/usr/bin/env python"',
    ]

    if get_setting('DEBUG'):
        cmd_list.append('-vvvv')

    if task_attempt.resources:
        disk_size = str(task_attempt.resources.get('disk_size', ''))
        cores = str(task_attempt.resources.get('cores', ''))
        memory = str(task_attempt.resources.get('memory', ''))
    else:
        disk_size = ''
        cores = ''
        memory = ''
    docker_image = task_attempt.environment.get(
        'docker_image')
    new_vars = {'LOOM_TASK_ATTEMPT_ID': str(task_attempt.uuid),
                'LOOM_TASK_ATTEMPT_DOCKER_IMAGE': docker_image,
                'LOOM_TASK_ATTEMPT_STEP_NAME': task_attempt.name,
    }
    if token:
        new_vars['LOOM_TOKEN'] = token
    if cores:
        new_vars['LOOM_TASK_ATTEMPT_CORES'] = cores
    if disk_size:
        new_vars['LOOM_TASK_ATTEMPT_DISK_SIZE_GB'] = disk_size
    if memory:
        new_vars['LOOM_TASK_ATTEMPT_MEMORY'] = memory

    env.update(new_vars)

    try:
        p = subprocess.Popen(cmd_list,
                             env=env,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
    except Exception as e:
        logger.error(str(e))
        task_attempt.system_error(detail=str(e))
        return

    terminal_output = ''
    for line in iter(p.stdout.readline, ''):
        terminal_output += line
        print line.strip()
    p.wait()
    if p.returncode != 0:
        logger.error('_run_execute_task_attempt_playbook failed for '\
                     'task_attempt.uuid="%s" with returncode="%s".'
                     % (task_attempt.uuid, p.returncode))
        msg = "Failed to launch worker process for TaskAttempt %s" \
              % task_attempt.uuid
        task_attempt.system_error(detail=terminal_output)
コード例 #46
0
ファイル: task_attempts.py プロジェクト: afcarl/loom
 def get_log_dir(self):
     return os.path.join(get_setting('FILE_ROOT_FOR_WORKER'),
                         'runtime_volumes', str(self.uuid), 'logs')
コード例 #47
0
def _run_execute_task_attempt_playbook(task_attempt):
    from django.contrib.auth.models import User
    from django.db import IntegrityError
    from rest_framework.authtoken.models import Token

    if get_setting('LOGIN_REQUIRED'):
        try:
            loom_user = User.objects.create(username='******')
        except IntegrityError:
            loom_user = User.objects.get(username='******')
        try:
            token = Token.objects.get(user=loom_user).key
        except Token.DoesNotExist:
            token = Token.objects.create(user=loom_user).key
    else:
        token = None

    env = copy.copy(os.environ)
    playbook = os.path.join(get_setting('PLAYBOOK_PATH'),
                            get_setting('RUN_TASK_ATTEMPT_PLAYBOOK'))
    cmd_list = [
        'ansible-playbook',
        '-i',
        get_setting('ANSIBLE_INVENTORY'),
        playbook,
        # Without this, ansible uses /usr/bin/python,
        # which may be missing needed modules
        '-e',
        'ansible_python_interpreter="/usr/bin/env python"',
    ]

    if get_setting('DEBUG'):
        cmd_list.append('-vvvv')

    resources = task_attempt.task.run.template.resources
    if resources:
        disk_size = str(resources.get('disk_size', ''))
        cores = str(resources.get('cores', ''))
        memory = str(resources.get('memory', ''))
    else:
        disk_size = ''
        cores = ''
        memory = ''
    docker_image = task_attempt.task.run.template.environment.get(
        'docker_image')
    name = task_attempt.task.run.name

    new_vars = {
        'LOOM_TASK_ATTEMPT_ID': str(task_attempt.uuid),
        'LOOM_TASK_ATTEMPT_DOCKER_IMAGE': docker_image,
        'LOOM_TASK_ATTEMPT_STEP_NAME': name,
    }
    if token:
        new_vars['LOOM_TOKEN'] = token
    if cores:
        new_vars['LOOM_TASK_ATTEMPT_CORES'] = cores
    if disk_size:
        new_vars['LOOM_TASK_ATTEMPT_DISK_SIZE_GB'] = disk_size
    if memory:
        new_vars['LOOM_TASK_ATTEMPT_MEMORY'] = memory

    env.update(new_vars)

    p = subprocess.Popen(cmd_list,
                         env=env,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT)
    terminal_output = ''
    for line in iter(p.stdout.readline, ''):
        terminal_output += line
        print line.strip()
    p.wait()
    if p.returncode != 0:
        logger.error('async._run_execute_task_attempt_playbook failed for '\
                     'task_attempt.uuid="%s" with returncode="%s".'
                     % (task_attempt.uuid, p.returncode))
        msg = "Failed to launch worker process for TaskAttempt %s" \
              % task_attempt.uuid
        task_attempt.add_event(msg, detail=terminal_output, is_error=True)
        task_attempt.fail(detail="Failed to launch worker process")
コード例 #48
0
def postprocess_run(*args, **kwargs):
    if get_setting('TEST_NO_POSTPROCESS'):
        logger.debug('Skipping async._postprocess_run because '\
                     'TEST_NO_POSTPROCESS is True')
        return
    return _run_with_delay(_postprocess_run, args, kwargs)
コード例 #49
0
 def analysis_error(self, detail=''):
     self._process_error(
         detail, get_setting('MAXIMUM_RETRIES_FOR_ANALYSIS_FAILURE'),
         'analysis_failure_count', 'Analysis error')
     raise Exception(detail)
コード例 #50
0
 def is_responsive(self):
     heartbeat = int(get_setting('TASKRUNNER_HEARTBEAT_INTERVAL_SECONDS'))
     timeout = int(get_setting('TASKRUNNER_HEARTBEAT_TIMEOUT_SECONDS'))
     return (timezone.now() - self.last_heartbeat).total_seconds() < timeout
コード例 #51
0
ファイル: async.py プロジェクト: StanfordBioinformatics/loom
def execute_with_delay(task_function, *args, **kwargs):
    """Run a task asynchronously after at least delay_seconds
    """
    delay = kwargs.pop('delay', 0)
    if get_setting('TEST_DISABLE_ASYNC_DELAY'):
        # Delay disabled, run synchronously
        logger.debug('Running function "%s" synchronously because '\
                     'TEST_DISABLE_ASYNC_DELAY is True'
                     % task_function.__name__)
        return task_function(*args, **kwargs)

    db.connections.close_all()
    task_function.apply_async(args=args, kwargs=kwargs, countdown=delay)

SYSTEM_CHECK_INTERVAL_MINUTES = get_setting('SYSTEM_CHECK_INTERVAL_MINUTES')

@periodic_task(run_every=timedelta(minutes=SYSTEM_CHECK_INTERVAL_MINUTES))
def check_for_stalled_tasks():
    """Check for tasks that are no longer sending a heartbeat
    """
    from api.models.tasks import Task
    for task in Task.objects.filter(status_is_running=True):
        if not task.is_responsive():
            task.system_error()
        if task.is_timed_out():
            task.timeout_error()

@periodic_task(run_every=timedelta(minutes=SYSTEM_CHECK_INTERVAL_MINUTES))
def check_for_missed_cleanup():
    """Check for TaskAttempts that were never cleaned up
コード例 #52
0
def kill_task_attempt(*args, **kwargs):
    return _run_with_delay(_kill_task_attempt, args, kwargs)


@shared_task
def _send_run_notifications(run_uuid):
    from api.models.runs import Run
    run = Run.objects.get(uuid=run_uuid)
    run.send_notifications()


def send_run_notifications(*args, **kwargs):
    return _run_with_delay(_send_run_notifications, args, kwargs)


SYSTEM_CHECK_INTERVAL_MINUTES = get_setting('SYSTEM_CHECK_INTERVAL_MINUTES')


@periodic_task(run_every=timedelta(minutes=SYSTEM_CHECK_INTERVAL_MINUTES))
def check_for_stalled_tasks():
    """Check for tasks that are no longer sending a heartbeat
    """
    from api.models.tasks import Task
    for task in Task.objects.filter(status_is_running=True):
        if task.is_unresponsive():
            task.system_error()


@periodic_task(run_every=timedelta(minutes=SYSTEM_CHECK_INTERVAL_MINUTES))
def check_for_missed_cleanup():
    """Check for TaskAttempts that were never cleaned up
コード例 #53
0
 def get_file_root(cls):
     file_root = get_setting('STORAGE_ROOT')
     assert file_root.startswith('/'), \
         'STORAGE_ROOT should be an absolute path, but it is "%s".' \
         % file_root
     return cls._add_url_prefix(file_root)
コード例 #54
0
ファイル: urls.py プロジェクト: StanfordBioinformatics/loom
                api.views.RunTagViewSet,
                base_name='run-tag')
router.register('run-labels',
                api.views.RunLabelViewSet,
                base_name='run-label')
router.register('users',
                api.views.UserViewSet,
                base_name='user')

urlpatterns = [
    url(r'^', include(router.urls)),
    url(r'^status/$', api.views.status),
    url(r'^info/$', api.views.info),
    url(r'^auth-status/$', api.views.auth_status),
    url(r'^storage-settings/$', api.views.StorageSettingsView.as_view()),
    url(r'^doc/$', get_swagger_view(title='Loom API')),
]

if get_setting('LOGIN_REQUIRED'):
    urlpatterns.extend([
        url(r'^auth/$', api.views.AuthView.as_view()),
        url(r'^tokens/$', api.views.TokenView.as_view()),
    ])

if settings.DEBUG:
    # This view is for testing response to a server error, e.g. where
    # server errors are logged.
    urlpatterns.extend([
        url('^error/$', api.views.raise_server_error),
    ])
コード例 #55
0
ファイル: data_objects.py プロジェクト: yqwu1983/loom
 def _get_file_root(cls):
     file_root = get_setting('STORAGE_ROOT')
     assert file_root.startswith('/'), \
         'STORAGE_ROOT should be an absolute path, but it is "%s".' \
         % file_root
     return file_root
コード例 #56
0
 def retrieve(self, request):
     return JsonResponse({
         'GCE_PROJECT': get_setting('GCE_PROJECT'),
     })
コード例 #57
0
ファイル: urls.py プロジェクト: yqwu1983/loom
                base_name='template-tag')
router.register('template-labels',
                api.views.TemplateLabelViewSet,
                base_name='template-label')
router.register('run-tags', api.views.RunTagViewSet, base_name='run-tag')
router.register('run-labels', api.views.RunLabelViewSet, base_name='run-label')
router.register('users', api.views.UserViewSet, base_name='user')

urlpatterns = [
    url(r'^', include(router.urls)),
    url(r'^status/$', api.views.status),
    url(r'^info/$', api.views.info),
    url(r'^auth-status/$', api.views.auth_status),
    url(r'^filemanager-settings/$',
        api.views.FileManagerSettingsView.as_view()),
    url(r'^doc/$', get_swagger_view(title='Loom API')),
]

if get_setting('LOGIN_REQUIRED'):
    urlpatterns.extend([
        url(r'^auth/$', api.views.AuthView.as_view()),
        url(r'^tokens/$', api.views.TokenView.as_view()),
    ])

if settings.DEBUG:
    # This view is for testing response to a server error, e.g. where
    # server errors are logged.
    urlpatterns.extend([
        url('^error/$', api.views.raise_server_error),
    ])