def refresh_user_cache():
    logger.info(f"Refreshing user cache")
    redis = RedisClient.get()
    for user in list(User.objects.all().exclude(profile__isnull=True)):
        bundle = get_user_bundle(user)
        redis.set(f"users/{user.username}", json.dumps(bundle))
    RedisClient.get().set(f"users_updated", timezone.now().timestamp())
def get_workflow_usage_timeseries(owner: str,
                                  name: str,
                                  branch: str,
                                  invalidate: bool = False) -> dict:
    redis = RedisClient.get()
    cached = redis.get(f"workflow_timeseries/{owner}/{name}/{branch}")

    if cached is None or invalidate:
        series = dict()
        tasks = Task.objects.filter(workflow__repo__owner=owner,
                                    workflow__repo__name=name,
                                    workflow__repo__branch=branch)

        # return early if no tasks
        if len(tasks) == 0:
            return series

        # count tasks per workflow
        for task in tasks:
            timestamp = datetime.combine(task.created.date(),
                                         datetime.min.time()).isoformat()
            if timestamp not in series: series[timestamp] = 0
            series[timestamp] = series[timestamp] + 1

        redis.set(f"workflow_timeseries/{owner}/{name}/{branch}",
                  json.dumps(series))
    else:
        series = json.loads(cached)

    return series
Example #3
0
def healthcheck(request, name):
    try:
        agent = Agent.objects.get(name=name)

        # if the requesting user doesn't own the agent and isn't on its
        # list of authorized users, they're not authorized to access it
        if not agent.public and agent.user != request.user and request.user.username not in [
                u.username for u in agent.users_authorized.all()
        ]:
            return HttpResponseNotFound()
    except:
        return HttpResponseNotFound()

    healthy, output = is_healthy(agent)
    check = {
        'timestamp': timezone.now().isoformat(),
        'healthy': healthy,
        'output': output
    }

    # persist health status to DB
    plantit.healthchecks.is_healthy = healthy
    agent.save()

    # update cache
    redis = RedisClient.get()
    length = redis.llen(f"healthchecks/{agent.name}")
    checks_saved = int(settings.AGENTS_HEALTHCHECKS_SAVED)
    if length > checks_saved: redis.rpop(f"healthchecks/{agent.name}")
    redis.lpush(f"healthchecks/{agent.name}", json.dumps(check))
    return JsonResponse(check)
def get_user_bundle(user: User):
    profile = Profile.objects.get(user=user)
    if not has_github_info(profile):
        return {
            'username': user.username,
            'first_name': user.first_name,
            'last_name': user.last_name,
        }
    else:
        redis = RedisClient.get()
        cached = redis.get(f"users/{user.username}")
        if cached is not None: return json.loads(cached)
        github_profile = async_to_sync(get_user_github_profile)(user)
        github_organizations = async_to_sync(get_user_github_organizations)(
            user)
        bundle = {
            'username': user.username,
            'first_name': user.first_name,
            'last_name': user.last_name,
            'github_username': profile.github_username,
            'github_profile': github_profile,
            'github_organizations': github_organizations,
        } if 'login' in github_profile else {
            'username': user.username,
            'first_name': user.first_name,
            'last_name': user.last_name,
        }
        redis.set(f"users/{user.username}", json.dumps(bundle))
        return bundle
Example #5
0
def agents_healthchecks():
    task_name = agents_healthchecks.name
    if not __acquire_lock(task_name):
        logger.warning(
            f"Task '{task_name}' is already running, aborting (maybe consider a longer scheduling interval?)"
        )
        return

    try:
        for agent in Agent.objects.all():
            healthy, output = is_healthy(agent)
            plantit.healthchecks.is_healthy = healthy
            agent.save()

            redis = RedisClient.get()
            length = redis.llen(f"healthchecks/{agent.name}")
            checks_saved = int(settings.AGENTS_HEALTHCHECKS_SAVED)
            if length > checks_saved: redis.rpop(f"healthchecks/{agent.name}")
            check = {
                'timestamp': timezone.now().isoformat(),
                'healthy': healthy,
                'output': output
            }
            redis.lpush(f"healthchecks/{agent.name}", json.dumps(check))
    finally:
        __release_lock(task_name)
async def refresh_user_workflow_cache(github_username: str):
    if github_username is None or github_username == '':
        raise ValueError(f"No GitHub username provided")

    try:
        profile = await sync_to_async(Profile.objects.get
                                      )(github_username=github_username)
        user = await get_profile_user(profile)
    except MultipleObjectsReturned:
        logger.warning(
            f"Multiple users bound to Github user {github_username}!")
        return
    except:
        logger.warning(f"Github user {github_username} does not exist")
        return

    # scrape GitHub to synchronize repos and workflow config
    profile = await sync_to_async(Profile.objects.get)(user=user)
    workflows = await github.list_connectable_repos_by_owner(
        github_username, profile.github_token)

    # update the cache, first removing workflows that no longer exist
    redis = RedisClient.get()
    removed = 0
    updated = 0
    added = 0
    old_keys = [
        key.decode('utf-8')
        for key in redis.scan_iter(match=f"workflows/{github_username}/*")
    ]
    new_keys = [
        f"workflows/{github_username}/{wf['repo']['name']}/{wf['branch']['name']}"
        for wf in workflows
    ]
    for old_key in old_keys:
        if old_key not in new_keys:
            logger.debug(f"Removing user workflow {old_key}")
            removed += 1
            redis.delete(old_key)
        else:
            logger.debug(f"Updating user workflow {old_key}")
            updated += 1

    # ...then adding/updating the workflows we just scraped
    for wf in workflows:
        # set flag if this is a featured workflow
        wf['featured'] = await is_featured(github_username, wf['repo']['name'],
                                           wf['branch']['name'])

        key = f"workflows/{github_username}/{wf['repo']['name']}/{wf['branch']['name']}"
        if key not in old_keys:
            logger.debug(f"Adding user workflow {key}")
            added += 1
        redis.set(key, json.dumps(del_none(wf)))

    redis.set(f"workflows_updated/{github_username}",
              timezone.now().timestamp())
    logger.info(
        f"{len(workflows)} workflow(s) now in GitHub user's {github_username}'s workflow cache (added {added}, updated {updated}, removed {removed})"
    )
def get_institutions(invalidate: bool = False) -> dict:
    redis = RedisClient.get()
    cached = list(redis.scan_iter(match=f"institutions/*"))
    institutions = dict()

    if invalidate:
        # count members per institution
        counts = {
            i['institution'].lower(): i['institution__count']
            for i in count_institutions()
        }

        for k in counts.keys():
            # get institution information (TODO: can we send all the requests concurrently?)
            # TODO: need to make sure this doesn't exceed the free plan rate limit
            result = async_to_sync(mapbox.get_institution)(
                k, settings.MAPBOX_TOKEN)

            # reconstruct institution name with proper capitalization from Mapbox result
            # TODO: are there any edge cases this might fail for?
            name = ' '.join(result['query'])

            # if we can't match the institution name, skip it
            if name not in counts:
                logger.warning(f"Failed to match {name} to any institution")
                continue

            # number of members in this institution
            count = counts[name]

            # if Mapbox returned no results, we can't return geocode information
            if len(result['features']) == 0:
                logger.warning(
                    f"No results from Mapbox for institution: {name}")
                institutions[name] = {
                    'institution': name,
                    'count': count,
                    'geocode': None
                }

            # if we got results, pick the top one
            else:
                feature = result['features'][0]
                feature['id'] = name
                feature['properties'] = {'name': name, 'count': count}
                institutions[name] = {
                    'institution': name,
                    'count': count,
                    'geocode': feature
                }

        for name, institution in institutions.items():
            redis.set(f"institutions/{name}", json.dumps(institution))
    else:
        for institution in cached:
            if institution is not None:
                institutions[institution.decode('utf-8')] = json.loads(
                    redis.get(institution))

    return institutions
async def get_workflow(owner: str,
                       name: str,
                       branch: str,
                       github_token: str,
                       cyverse_token: str,
                       invalidate: bool = False) -> dict:
    redis = RedisClient.get()
    updated = redis.get(f"workflows_updated/{owner}")
    workflow = redis.get(f"workflows/{owner}/{name}/{branch}")

    if updated is None or workflow is None or invalidate:
        bundle = await github.get_repo_bundle(owner, name, branch,
                                              github_token, cyverse_token)
        workflow = {
            'config':
            bundle['config'],
            'repo':
            bundle['repo'],
            'validation':
            bundle['validation'],
            'branch':
            branch,
            'featured':
            FeaturedWorkflow.objects.filter(owner=owner,
                                            name=name,
                                            branch=branch).exists()
        }
        redis.set(f"workflows/{owner}/{name}/{branch}",
                  json.dumps(del_none(workflow)))
        return workflow
    else:
        return json.loads(workflow)
Example #9
0
def refresh_all_users_stats():
    task_name = refresh_all_users_stats.name
    if not __acquire_lock(task_name):
        logger.warning(
            f"Task '{task_name}' is already running, aborting (maybe consider a longer scheduling interval?)"
        )
        return

    try:
        # TODO: move caching to query layer
        redis = RedisClient.get()

        for user in User.objects.all():
            logger.info(f"Computing statistics for {user.username}")

            # overall statistics (no need to save result, just trigger reevaluation)
            async_to_sync(q.get_user_statistics)(user, True)

            # timeseries (no need to save result, just trigger reevaluation)
            q.get_user_timeseries(user, invalidate=True)

        logger.info(f"Computing aggregate statistics")
        redis.set("stats_counts", json.dumps(q.get_total_counts(True)))
        redis.set("total_timeseries",
                  json.dumps(q.get_aggregate_timeseries(True)))
    finally:
        __release_lock(task_name)
def list_project_workflows(project: Investigation) -> List[dict]:
    redis = RedisClient.get()
    proj_dict = project_to_dict(project)
    return [
        json.loads(wf) for wf in [
            redis.get(key) for key in
            [f"workflows/{name}" for name in proj_dict['workflows']]
        ] if wf is not None
    ]
def list_public_workflows() -> List[dict]:
    redis = RedisClient.get()
    workflows = [
        wf for wf in [
            json.loads(redis.get(key))
            for key in redis.scan_iter(match='workflows/*')
        ] if 'public' in wf['config'] and wf['config']['public']
    ]
    return workflows
def get_project_workflows(project: Investigation):
    redis = RedisClient.get()
    workflows = [
        wf for wf in [
            json.loads(redis.get(key))
            for key in redis.scan_iter(match='workflows/*')
        ] if 'projects' in wf['config']
        and project.guid in wf['config']['projects']
    ]
    return workflows
Example #13
0
def refresh_user_institutions():
    task_name = refresh_user_institutions.name
    if not __acquire_lock(task_name):
        logger.warning(
            f"Task '{task_name}' is already running, aborting (maybe consider a longer scheduling interval?)"
        )
        return

    try:
        # TODO: move caching to query layer
        redis = RedisClient.get()
        institutions = q.get_institutions(True)
        for name, institution in institutions.items():
            redis.set(f"institutions/{name}", json.dumps(institution))
    finally:
        __release_lock(task_name)
def get_user_timeseries(user: User, invalidate: bool = False) -> dict:
    redis = RedisClient.get()
    cached = redis.get(f"user_timeseries/{user.username}")

    if cached is None or invalidate:
        tasks_usage = get_tasks_usage_timeseries(user=user)
        workflows_usage = get_workflows_usage_timeseries(user)
        agents_usage = get_agents_usage_timeseries(user)
        series = {
            'tasks_usage': tasks_usage,
            'agents_usage': agents_usage,
            'workflows_usage': workflows_usage
        }

        redis.set(f"user_timeseries/{user.username}", json.dumps(series))
    else:
        series = json.loads(cached)

    return series
async def refresh_org_workflow_cache(org_name: str, github_token: str):
    # scrape GitHub to synchronize repos and workflow config
    workflows = await github.list_connectable_repos_by_org(
        org_name, github_token)

    # update the cache, first removing workflows that no longer exist
    redis = RedisClient.get()
    removed = 0
    updated = 0
    added = 0
    old_keys = [
        key.decode('utf-8')
        for key in redis.scan_iter(match=f"workflows/{org_name}/*")
    ]
    new_keys = [
        f"workflows/{org_name}/{wf['repo']['name']}/{wf['branch']['name']}"
        for wf in workflows
    ]
    for old_key in old_keys:
        if old_key not in new_keys:
            logger.debug(f"Removing org workflow {old_key}")
            removed += 1
            redis.delete(old_key)
        else:
            logger.debug(f"Updating org workflow {old_key}")
            updated += 1

    # ...then adding/updating the workflows we just scraped
    for wf in workflows:
        # set flag if this is a featured workflow
        wf['featured'] = await is_featured(org_name, wf['repo']['name'],
                                           wf['branch']['name'])

        key = f"workflows/{org_name}/{wf['repo']['name']}/{wf['branch']['name']}"
        if key not in old_keys:
            logger.debug(f"Adding org workflow {key}")
            added += 1
        redis.set(key, json.dumps(del_none(wf)))

    redis.set(f"workflows_updated/{org_name}", timezone.now().timestamp())
    logger.info(
        f"{len(workflows)} workflow(s) now in GitHub organization {org_name}'s workflow cache (added {added}, updated {updated}, removed {removed})"
    )
Example #16
0
def healthchecks(request, name):
    try:
        agent = Agent.objects.get(name=name)

        # if the requesting user doesn't own the agent and isn't on its
        # list of authorized users, they're not authorized to access it
        if not agent.public and agent.user != request.user and request.user.username not in [
                u.username for u in agent.users_authorized.all()
        ]:
            return HttpResponseNotFound()
    except:
        return HttpResponseNotFound()

    redis = RedisClient.get()
    checks = [
        json.loads(check)
        for check in redis.lrange(f"healthchecks/{agent.name}", 0, -1)
    ]
    return JsonResponse({'healthchecks': checks})
def get_total_counts(invalidate: bool = False) -> dict:
    redis = RedisClient.get()
    cached = redis.get("stats_counts")

    if cached is None or invalidate:
        users = User.objects.count()
        online = len(filter_online(User.objects.all())
                     )  # TODO store this in the DB each time the user logs in
        wfs = [
            json.loads(redis.get(key))
            for key in redis.scan_iter('workflows/*')
        ]
        devs = list(set([wf['repo']['owner']['login'] for wf in wfs]))
        workflows = len(wfs)
        developers = len(devs)
        agents = Agent.objects.count()
        tasks = TaskCounter.load().count
        running = len(
            list(
                Task.objects.exclude(status__in=[
                    TaskStatus.COMPLETED, TaskStatus.FAILURE,
                    TaskStatus.TIMEOUT, TaskStatus.CANCELED
                ])))
        counts = {
            'users': users,
            'online': online,
            'workflows': workflows,
            'developers': developers,
            'agents': agents,
            'tasks': tasks,
            'running': running
        }

        redis.set("stats_counts", json.dumps(counts))
    else:
        counts = json.loads(cached)

    return counts
def get_aggregate_timeseries(invalidate: bool = False) -> dict:
    redis = RedisClient.get()
    cached = redis.get("total_timeseries")

    if cached is None or invalidate:
        users_total = get_users_total_timeseries()
        tasks_total = get_tasks_total_timeseries()
        tasks_usage = get_tasks_usage_timeseries()
        workflows_usage = get_workflows_usage_timeseries()
        agents_usage = get_agents_usage_timeseries()
        series = {
            'users_total': users_total,
            'tasks_total': tasks_total,
            'tasks_usage': tasks_usage,
            'agents_usage': agents_usage,
            'workflows_usage': workflows_usage,
        }

        redis.set("total_timeseries", json.dumps(series))
    else:
        series = json.loads(cached)

    return series
def list_users(invalidate: bool = False) -> List[dict]:
    redis = RedisClient.get()
    updated = redis.get(f"users_updated")

    # repopulate if empty or invalidation requested
    if updated is None or len(list(
            redis.scan_iter(match=f"users/*"))) == 0 or invalidate:
        refresh_user_cache()
    else:
        age = (datetime.now() - datetime.fromtimestamp(float(updated)))
        age_secs = age.total_seconds()
        max_secs = (int(settings.USERS_REFRESH_MINUTES) * 60)

        # otherwise only if stale
        if age_secs > max_secs:
            logger.info(
                f"User cache is stale ({age_secs}s old, {age_secs - max_secs}s past limit), repopulating"
            )
            refresh_user_cache()

    return [
        json.loads(redis.get(key)) for key in redis.scan_iter(match=f"users/*")
    ]
def task_to_dict(task: Task) -> dict:
    orchestrator_log_file_path = get_task_orchestrator_log_file_path(task)
    if Path(orchestrator_log_file_path).is_file():
        with open(orchestrator_log_file_path, 'r') as log:
            orchestrator_logs = [
                line.strip() for line in log.readlines()[-int(1000000):]
            ]
    else:
        orchestrator_logs = []

    # try:
    #     AgentAccessPolicy.objects.get(user=task.user, agent=task.agent, role__in=[AgentRole.admin, AgentRole.guest])
    #     can_restart = True
    # except:
    #     can_restart = False

    results = RedisClient.get().get(f"results/{task.guid}")

    return {
        # 'can_restart': can_restart,
        'guid':
        task.guid,
        'status':
        task.status,
        'owner':
        task.user.username,
        'name':
        task.name,
        'project': {
            'title': task.project.title,
            'owner': task.project.owner.username,
            'description': task.project.description
        } if task.project is not None else None,
        'study': {
            'title': task.study.title,
            'description': task.study.description
        } if task.study is not None else None,
        'work_dir':
        task.workdir,
        'orchestrator_logs':
        orchestrator_logs,
        'inputs_detected':
        task.inputs_detected,
        'inputs_downloaded':
        task.inputs_downloaded,
        'inputs_submitted':
        task.inputs_submitted,
        'inputs_completed':
        task.inputs_completed,
        'agent':
        agent_to_dict(task.agent) if task.agent is not None else None,
        'created':
        task.created.isoformat(),
        'updated':
        task.updated.isoformat(),
        'completed':
        task.completed.isoformat() if task.completed is not None else None,
        'due_time':
        None if task.due_time is None else task.due_time.isoformat(),
        'cleanup_time':
        None if task.cleanup_time is None else task.cleanup_time.isoformat(),
        'workflow_owner':
        task.workflow_owner,
        'workflow_name':
        task.workflow_name,
        'workflow_branch':
        task.workflow_branch,
        'workflow_image_url':
        task.workflow_image_url,
        'input_path':
        task.workflow['input']['path'] if 'input' in task.workflow else None,
        'output_path':
        task.workflow['output']['to'] if
        ('output' in task.workflow
         and 'to' in task.workflow['output']) else None,
        'tags': [str(tag) for tag in task.tags.all()],
        'is_complete':
        task.is_complete,
        'is_success':
        task.is_success,
        'is_failure':
        task.is_failure,
        'is_cancelled':
        task.is_cancelled,
        'is_timeout':
        task.is_timeout,
        'result_previews_loaded':
        task.previews_loaded,
        'result_transfer':
        has_output_target(task),
        'results_retrieved':
        task.results_retrieved,
        'results_transferred':
        task.results_transferred,
        'cleaned_up':
        task.cleaned_up,
        'transferred':
        task.transferred,
        'transfer_path':
        task.transfer_path,
        'output_files':
        json.loads(results) if results is not None else [],
        'job_id':
        task.job_id,
        'job_status':
        task.job_status,
        'job_walltime':
        task.job_consumed_walltime,
        'delayed_id':
        task.delayed_id,
        'repeating_id':
        task.repeating_id
    }
Example #21
0
def test_push(self, guid: str):
    if guid is None:
        logger.warning(f"Aborting")
        self.request.callbacks = None
        return

    try:
        task = Task.objects.get(guid=guid)
    except:
        logger.warning(
            f"Could not find task with GUID {guid} (might have been deleted?)")
        return

    try:
        # check the expected filenames against the contents of the CyVerse collection
        path = task.workflow['output']['to']
        actual = [
            file.rpartition('/')[2] for file in terrain.list_dir(
                path, task.user.profile.cyverse_access_token)
        ]
        expected = [
            file['name'] for file in json.loads(RedisClient.get().get(
                f"results/{task.guid}")) if file['exists']
        ]
        from pprint import pprint
        pprint(actual)
        pprint(expected)

        if not set(expected).issubset(set(actual)):
            message = f"Transfer to CyVerse directory {path} incomplete: expected {len(expected)} files but found {len(actual)}"
            logger.warning(message)

            # mark the task failed
            now = timezone.now()
            task.updated = now
            task.completed = now
            task.status = TaskStatus.FAILURE
            task.transferred = True
            task.results_transferred = len(expected)
            task.transfer_path = path
            task.save()
        else:
            message = f"Transfer to CyVerse directory {path} completed"
            logger.info(message)

            # mark the task succeeded
            now = timezone.now()
            task.updated = now
            task.completed = now
            task.status = TaskStatus.COMPLETED if task.status != TaskStatus.FAILURE else task.status
            task.transferred = True
            task.results_transferred = len(expected)
            task.transfer_path = path
            task.save()

        # log status update and push it to clients
        log_task_orchestrator_status(task, [message])
        async_to_sync(push_task_channel_event)(task)

        return guid
    except Exception:
        self.request.callbacks = None
        message = f"Failed to test CyVerse transfer: {traceback.format_exc()}"

        # mark the task failed and persist it
        task.status = TaskStatus.FAILURE
        now = timezone.now()
        task.updated = now
        task.completed = now
        task.save()

        # log status update and push it to client
        log_task_orchestrator_status(task, [message])
        async_to_sync(push_task_channel_event)(task)

        # push AWS SNS notification
        if task.user.profile.push_notification_status == 'enabled':
            SnsClient.get().publish_message(
                task.user.profile.push_notification_topic_arn,
                f"PlantIT task {task.guid}", message, {})

        # revoke access to the user's datasets then clean up the task
        unshare_data.s(task.guid).apply_async()
        tidy_up.s(task.guid).apply_async(
            countdown=int(environ.get('TASKS_CLEANUP_MINUTES')) * 60)
Example #22
0
def test_results(self, guid: str):
    if guid is None:
        logger.warning(f"Aborting")
        self.request.callbacks = None
        return

    try:
        task = Task.objects.get(guid=guid)
    except:
        logger.warning(
            f"Could not find task with GUID {guid} (might have been deleted?)")
        self.request.callbacks = None
        return

    try:
        # get logs from agent filesystem
        ssh = get_task_ssh_client(task)
        get_task_remote_logs(task, ssh)

        # get results from agent filesystem, then save them to cache and update the task
        results = list_result_files(task)
        found = [r for r in results if r['exists']]

        redis = RedisClient.get()
        redis.set(f"results/{task.guid}", json.dumps(found))
        task.results_retrieved = True
        task.save()

        # make sure we got the results we expected
        missing = [r for r in results if not r['exists']]
        if len(missing) > 0:
            message = f"Found {len(found)} results, missing {len(missing)}: {', '.join([m['name'] for m in missing])}"
        else:
            message = f"Found {len(found)} results"

        # log status update and push it to client(s)
        log_task_orchestrator_status(task, [message])
        async_to_sync(push_task_channel_event)(task)

        return guid
    except Exception:
        self.request.callbacks = None
        message = f"Failed to check results: {traceback.format_exc()}"

        # mark the task failed and persist it
        task.status = TaskStatus.FAILURE
        now = timezone.now()
        task.updated = now
        task.completed = now
        task.save()

        # log status to file
        log_task_orchestrator_status(task, [message])

        # push status to client(s)
        async_to_sync(push_task_channel_event)(task)

        # push AWS SNS notification
        if task.user.profile.push_notification_status == 'enabled':
            SnsClient.get().publish_message(
                task.user.profile.push_notification_topic_arn,
                f"PlantIT task {task.guid}", message, {})

        # revoke access to the user's datasets then clean up the task
        unshare_data.s(task.guid).apply_async()
        tidy_up.s(task.guid).apply_async(
            countdown=int(environ.get('TASKS_CLEANUP_MINUTES')) * 60)
def list_user_workflows(owner: str) -> List[dict]:
    redis = RedisClient.get()
    return [
        json.loads(redis.get(key))
        for key in redis.scan_iter(match=f"workflows/{owner}/*")
    ]
def get_last_task_config(username, owner, name, branch):
    redis = RedisClient.get()
    last_config = redis.get(
        f"workflow_configs/{username}/{owner}/{name}/{branch}")
    return None if last_config is None else json.loads(last_config)
def list_org_workflows(organization: str) -> List[dict]:
    redis = RedisClient.get()
    return [
        json.loads(redis.get(key))
        for key in redis.scan_iter(match=f"workflows/{organization}/*")
    ]
def create_immediate_task(user: User, config: dict):
    # set submission time so we can persist configuration
    # and show recent submissions to the user in the UI
    config['timestamp'] = timezone.now().isoformat()

    # parse GitHub repo info
    repo_owner = config['repo']['owner']
    repo_name = config['repo']['name']
    repo_branch = config['repo']['branch']

    # persist task configuration
    redis = RedisClient.get()
    redis.set(
        f"workflow_configs/{user.username}/{repo_owner}/{repo_name}/{repo_branch}",
        json.dumps(config))

    # get the task GUID and name
    guid = config.get('guid', None) if config['type'] == 'Now' else str(
        uuid.uuid4())
    name = config.get('name', None)

    # if the browser client hasn't set a GUID, create one
    if guid is None: guid = str(uuid.uuid4())

    # get the agent this task should be submitted on
    agent = Agent.objects.get(name=config['agent'])

    # if we have a time limit, calculate due time
    time = config.get('time', None)
    if time is not None:
        time_limit = parse_task_time_limit(time)
        logger.info(f"Using task time limit {time_limit}s")
        due_time = timezone.now() + timedelta(seconds=time_limit)
    else:
        due_time = None

    # create the task right meow
    now = timezone.now()
    task = Task.objects.create(
        guid=guid,
        name=guid if name is None or name == '' else name,
        user=user,
        workflow=config['workflow'],
        workflow_owner=repo_owner,
        workflow_name=repo_name,
        workflow_branch=repo_branch,
        agent=agent,
        status=TaskStatus.CREATED,
        created=now,
        updated=now,
        due_time=due_time,
        token=binascii.hexlify(os.urandom(20)).decode())

    # add MIAPPE info, if we have any
    project, study = parse_task_miappe_info(config['miappe'])
    if project is not None:
        task.project = Investigation.objects.get(owner=user, title=project)
    if study is not None:
        task.study = Study.objects.get(investigation=task.project, title=study)

    # add repo logo
    if 'logo' in config['workflow']:
        logo_path = config['workflow']['logo']
        task.workflow_image_url = f"https://raw.githubusercontent.com/{repo_owner}/{repo_name}/master/{logo_path}"

    for tag in config['tags']:
        task.tags.add(tag)  # add task tags
    task.workdir = f"{task.guid}/"  # use GUID for working directory name
    task.save()

    # increment task count for aggregate statistics
    counter = TaskCounter.load()
    counter.count = counter.count + 1
    counter.save()

    return task
Example #27
0
    def cyverse_handle_temporary_code(self, request):
        session_state = request.GET.get('session_state', None)
        code = request.GET.get('code', None)

        # missing session state string or code indicates a mis-configured redirect from the KeyCloak client?
        if session_state is None:
            return HttpResponseBadRequest("Missing param: 'session_state'")
        if code is None: return HttpResponseBadRequest("Missing param: 'code'")

        # send the authorization request
        response = requests.post(
            "https://kc.cyverse.org/auth/realms/CyVerse/protocol/openid-connect/token",
            data={
                'grant_type': 'authorization_code',
                'client_id': os.environ.get('CYVERSE_CLIENT_ID'),
                'code': code,
                'redirect_uri': os.environ.get('CYVERSE_REDIRECT_URL')
            },
            auth=HTTPBasicAuth(request.user.username,
                               os.environ.get('CYVERSE_CLIENT_SECRET')))

        # if we have anything other than a 200 the auth request did not succeed
        if response.status_code == 400:
            return HttpResponse('Unauthorized for KeyCloak token endpoint',
                                status=401)
        elif response.status_code != 200:
            return HttpResponse('Bad response from KeyCloak token endpoint',
                                status=500)

        # get the response body
        content = response.json()

        # make sure we have CyVerse access & refresh tokens
        if 'access_token' not in content:
            return HttpResponseBadRequest(
                "Missing param on token response: 'access_token'")
        if 'refresh_token' not in content:
            return HttpResponseBadRequest(
                "Missing param on token response: 'refresh_token'")

        # decode them
        access_token = content['access_token']
        refresh_token = content['refresh_token']
        decoded_access_token = jwt.decode(access_token,
                                          options={
                                              'verify_signature': False,
                                              'verify_aud': False,
                                              'verify_iat': False,
                                              'verify_exp': False,
                                              'verify_iss': False
                                          })
        decoded_refresh_token = jwt.decode(refresh_token,
                                           options={
                                               'verify_signature': False,
                                               'verify_aud': False,
                                               'verify_iat': False,
                                               'verify_exp': False,
                                               'verify_iss': False
                                           })

        # retrieve the user entry (or create if it's their first time logging in)
        user, _ = User.objects.get_or_create(
            username=decoded_access_token['preferred_username'])

        # update the user's personal info
        user.first_name = decoded_access_token['given_name']
        user.last_name = decoded_access_token['family_name']
        user.email = decoded_access_token['email']
        user.save()

        # update the user's profile (CyVerse tokens, etc)
        profile, created = Profile.objects.get_or_create(user=user)
        if created: profile.created = timezone.now()
        profile.cyverse_access_token = access_token
        profile.cyverse_refresh_token = refresh_token
        user.profile = profile
        profile.save()
        user.save()

        # if user's stats haven't been calculated yet, schedule it
        redis = RedisClient.get()
        cached_stats = redis.get(f"stats/{user.username}")
        if cached_stats is None:
            self.logger.info(
                f"No usage statistics for {user.username}. Scheduling refresh..."
            )
            refresh_user_stats.s(user.username).apply_async()

        # log the user into the builtin django backend
        login(request,
              user,
              backend='django.contrib.auth.backends.ModelBackend')

        # open the dashboard
        return redirect(f"/home/")
async def get_user_statistics(user: User, invalidate: bool = False) -> dict:
    redis = RedisClient.get()
    cached = redis.get(f"stats/{user.username}")

    if cached is None or invalidate:
        profile = await sync_to_async(Profile.objects.get)(user=user)
        all_tasks = await filter_tasks(user=user)
        completed_tasks = await filter_tasks(user=user, completed=True)
        total_tasks = len(all_tasks)
        total_time = sum([(task.completed - task.created).total_seconds()
                          for task in completed_tasks])
        total_results = sum([
            len(task.results if task.results is not None else [])
            for task in completed_tasks
        ])
        owned_workflows = [
            f"{workflow['repo']['owner']['login']}/{workflow['name'] if 'name' in workflow else '[unnamed]'}"
            for workflow in list_user_workflows(owner=profile.github_username)
        ] if profile.github_username != '' else []
        used_workflows = [
            f"{task.workflow_owner}/{task.workflow_name}" for task in all_tasks
        ]
        used_workflows_counter = Counter(used_workflows)
        unique_used_workflows = list(np.unique(used_workflows))
        owned_agents = [(await sync_to_async(agent_to_dict)
                         (agent, user.username))['name'] for agent in [
                             agent for agent in await filter_agents(user=user)
                             if agent is not None
                         ]]
        guest_agents = [(await sync_to_async(agent_to_dict)
                         (agent, user.username))['name'] for agent in [
                             agent for agent in await filter_agents(user=user)
                             if agent is not None
                         ]]
        used_agents = [
            (await sync_to_async(agent_to_dict)(agent, user.username))['name']
            for agent in [
                a for a in [await get_task_agent(task) for task in all_tasks]
                if a is not None
            ]
        ]
        used_projects = [
            (await sync_to_async(project_to_dict)(project)) for project in [
                p
                for p in [await get_task_project(task) for task in all_tasks]
                if p is not None
            ]
        ]
        used_agents_counter = Counter(used_agents)
        used_projects_counter = Counter([
            f"{project['guid']} ({project['title']})"
            for project in used_projects
        ])
        unique_used_agents = list(np.unique(used_agents))

        # owned_datasets = terrain.list_dir(f"/iplant/home/{user.username}", profile.cyverse_access_token)
        # guest_datasets = terrain.list_dir(f"/iplant/home/", profile.cyverse_access_token)
        tasks_running = await sync_to_async(get_tasks_usage_timeseries)(600,
                                                                        user)

        stats = {
            'total_tasks': total_tasks,
            'total_task_seconds': total_time,
            'total_task_results': total_results,
            'owned_workflows': owned_workflows,
            'workflow_usage': {
                'values': [
                    used_workflows_counter[workflow]
                    for workflow in unique_used_workflows
                ],
                'labels':
                unique_used_workflows,
            },
            'agent_usage': {
                'values':
                [used_agents_counter[agent] for agent in unique_used_agents],
                'labels':
                unique_used_agents,
            },
            'project_usage': {
                'values': list(dict(used_projects_counter).values()),
                'labels': list(dict(used_projects_counter).keys()),
            },
            'task_status': {
                'values':
                [1 if task.status == 'success' else 0 for task in all_tasks],
                'labels': [
                    'SUCCESS' if task.status == 'success' else 'FAILURE'
                    for task in all_tasks
                ],
            },
            'owned_agents': owned_agents,
            'guest_agents': guest_agents,
            'institution': profile.institution,
            'tasks_running': tasks_running
        }
        redis.set(f"stats/{user.username}", json.dumps(stats))
    else:
        stats = json.loads(cached)
    return stats