def get_ancestry(user_id): """ Given an API user id, perform the ancestry calculations on that user's genotype data. """ with record('tasks.cad.get_ancestry', user_id): user = User.objects.get(api_user_id=user_id) return steps.grs_step_1(uuid.uuid4().hex, user.profile.genotype.converted_file)
def _send_cad_notification(user_id): """ Send a Risk Score Notification for the CAD condition. Uses the API method for sending notifications. """ with record('tasks.cad._send_cad_notification', user_id): cad = models.Condition.objects.filter( name__iexact='coronary artery disease')[0] send_risk_score_notification(user_id, cad.name)
def _get_cad_haplotypes(user_id, chromosome): """ Given a chromosome, determine the known haplotypes inside it. """ with record('tasks.cad._get_cad_haplotypes', user_id): user = User.objects.get(api_user_id=user_id) return steps.grs_step_2(uuid.uuid4().hex, user.profile.genotype.converted_file, user_id, PHENOTYPE, chromosome)
def _store_results(results, user_id): """ Given the results of a user's CAD risk score, store the data. """ with record('tasks.cad._store_results', user_id): ancestries, path, scores = results user = models.User.objects.get(id=user_id) cad = models.Condition.objects.filter( name__iexact='coronary artery disease')[0] version = steps.get_version() for population_name, score in zip(SCORE_RESULTS_ORDER, scores.split('\n')): featured = True if population_name == 'custom' else False population = models.Population.objects.filter( name__iexact=population_name)[0] risk_score = models.RiskScore(user=user, condition=cad, featured=featured, population=population, calculated=True, value=float(score), version=version) risk_score.save() for population_name, per_ancestry in zip(SCORE_RESULTS_ORDER, ancestries.split()): population = models.Population.objects.filter( name__iexact=population_name)[0] ancestry = models.Ancestry(user=user, population=population, value=float(per_ancestry), version=version) ancestry.save()
def _dispatch_series_value_update(metric_status_id): """ Given a user's metric status dispatch the update to another task type which will recalculate the metrics. """ with record('tasks.lifestyle._dispatch_series_value_update'): status = models.LifestyleMetricStatus.objects.get(id=metric_status_id) METRIC_TASKS_BY_IDENTIFIER[status.metric.identifier].delay(metric_status_id)
def create_statuses_for_existing_users(activity_id): with record('tasks.api.create_statuses_for_existing_users'): activity = Activity.objects.get(id=activity_id) if activity.is_tracked_serverside: for user in User.objects.filter(is_active=True): status = ActivityStatus(user=user, activity=activity) status.save()
def update_user_metrics(): """ Find all of the user metrics that need to be updated and dispatch individual jobs to do so. """ with record('tasks.lifestyle.update_user_metrics'): workflow = (_find_metric_statuses_to_update.s() | dmap.s(_dispatch_series_value_update.s())) workflow.delay()
def get_daily_report(): with record('tasks.compute.admin.get_daily_report'): start, end, task_stats = get_daily_task_stats() return { 'start_date': start, 'end_date': end, 'tasks': task_stats }
def _get_urls_from_recent_reddits(client_id, client_secret, username, password): """ Fetch all of the urls from the desired subissions on the given subreddits. """ with record('tasks.reddit._get_urls_from_recent_reddits'): return [ url for url in _fetch_from_reddit(client_id, client_secret, username, password) ]
def get_daily_task_stats(): with record('tasks.compute.admin.get_daily_task_stats'): start, end, tasks = _get_daily_tasks() groups = _sort_tasks_into_groups(tasks) stats = { name: _get_stats_for_tasks(tasks) for name, tasks in groups } return start, end, stats
def update_news_feed_from_reddit(): with record('tasks.reddit.update_news_feed_from_reddit'): get_urls_from_reddit = _get_urls_from_recent_reddits.si( settings.REDDIT_CLIENT_ID, settings.REDDIT_CLIENT_SECRET, settings.REDDIT_USERNAME, settings.REDDIT_PASSWORD) workflow = (get_urls_from_reddit | _filter_urls.s() | _save_opengraph_data_for_urls.s()) workflow.delay()
def _find_metric_statuses_to_update(): """ Return a list of all of the metric status ids that need to be updated. """ with record('tasks.lifestyle._find_metric_statuses_to_update'): expiry_time = (timezone.now() - timedelta(minutes=10)) expired_statuses = models.LifestyleMetricStatus.objects.filter( last_updated__lte=expiry_time) return [str(status.id) for status in expired_statuses]
def _filter_urls(urls): """ Given a list of urls, remove any from known bad hosts. Some sites throw up paywalls or other things that make them bad for the newsfeed. """ with record('tasks.reddit._filter_urls'): return [ url for url in urls if not filters.is_known_bad_host(url) and not filters.is_duplicate_of_existing(url) ]
def update_active_time_status(status_id): with record('tasks.metrics.update_active_time_status'): status = models.LifestyleMetricStatus.objects.get(id=status_id) today = timezone.now() yesterday = today - timedelta(days=1) for day in [yesterday, today]: update_scores_for(status.user, day, status.metric.series.first()) status.last_updated = timezone.now() status.save()
def _find_metric_statuses_to_update(): """ Return a list of all of the metric status ids that need to be updated. """ with record('tasks.lifestyle._find_metric_statuses_to_update'): one_day_old = (timezone.now() - timedelta(hours=1)) expired_statuses = models.LifestyleMetricStatus.objects.filter( last_updated__lte=one_day_old ) return [str(status.id) for status in expired_statuses]
def _get_total_cad_risk(results, user_id): """ Given the user's ancestry, and their individual risk per chromosome per chunk, calculate their total overall risk. """ with record('tasks.cad._get_total_cad_risk', user_id): # A hack to filter out the ancestry record. Celery doesn't guarantee order. ancestry = [result for result in results if 'ancestry' in result[1]][0] risk_of_risks = [result for result in results if 'ancestry' not in result[1]] filename, ancestry_path, ancestry_contents = ancestry return (ancestry_contents, *steps.grs_step_4(uuid.uuid4().hex, filename, ancestry_path, ancestry_contents, risk_of_risks, user_id, PHENOTYPE))
def send_post_cad_survey_to_users(user_id): """ On completion of the Risk Score calculation add the Post CAD survey status for the user. """ with record('tasks.api.send_post_cad_survey_to_users', user_id): user = User.objects.get(id=user_id) if not user.is_active: return activity = Activity.objects.get(study_task_identifier=settings.POST_CAD_RESULTS_SURVEY_ID) status = ActivityStatus(user=user, activity=activity) status.save()
def send_registration_email_to_user(registration_url, registration_code, user_email): """ Given a user and registration information, send a registration email to that user. """ with record('tasks.api.send_registration_email_to_user'): html = render_to_string('registration_email.html', { 'url': registration_url, 'code': registration_code }) send_mail(settings.REGISTER_EMAIL_SUBJECT, '', settings.EMAIL_HOST_USER, [user_email], fail_silently=False, html_message=html)
def update_user_metrics(): """ Find all of the user metrics that need to be updated and dispatch individual jobs to do so. """ with record('tasks.lifestyle.update_user_metrics'): workflow = ( _find_metric_statuses_to_update.s() | dmap.s( _dispatch_series_value_update.s() ) ) workflow.delay()
def send_activity_notification(activity_id): with record('tasks.api.send_activity_notification'): message = 'A new activity is ready for you!' # APNS activity = Activity.objects.get(id=activity_id) devices = APNSDevice.objects.filter(active=True, user__is_active=True) devices.send_message(message) # Email for user in User.objects.filter(is_active=True): html = render_to_string('new_activity.html', {}) send_mail(message, message, settings.EMAIL_HOST_USER, [user.email], fail_silently=False, html_message=html)
def _save_opengraph_data_for_url(url): """ Given a url, save it's open graph data if it has it. If the Open Graph data doesn't exist then move on. """ with record('tasks.reddit._save_opengraph_data_for_url'): og = OpenGraph(url=url) item = models.Item(title=og.title, link=url, description=og.description, image=getattr(og, 'image', None), source=models.Item.SOURCES['reddit']) item.save()
def _import_genotype(token, api_user_id, profile_id): """ Given the id of a profile model and a bearer token, this function will download the raw genotype data from 23andme and save it in a genotype object and spawns a job to convert the raw file into the VCF format. """ with record('23andMe.tasks._import_genotype', user_id=api_user_id): profile = Profile.objects.get(profile_id=profile_id, user__api_user_id=api_user_id) genotype_data = get_genotype_data(token, profile) genotype = Genotype.from_json(genotype_data, profile) genotype.save() return str(genotype.id)
def send_risk_score_notification(user_id, condition_name): with record('tasks.api.send_risk_score_notification', user_id): message = 'Your risk score for {condition} is available.'.format( condition=condition_name) # APNS devices = APNSDevice.objects.filter(active=True, user__is_active=True, user__id=user_id) devices.send_message(message) # Email user = User.objects.get(id=user_id) html = render_to_string('new_risk_score.html', { 'condition_name': condition_name }) send_mail(message, message, settings.EMAIL_HOST_USER, [user.email], fail_silently=False, html_message=html)
def _save_opengraph_data_for_url(url): """ Given a url, save it's open graph data if it has it. If the Open Graph data doesn't exist then move on. """ with record('tasks.reddit._save_opengraph_data_for_url'): og = OpenGraph(url=url) item = models.Item( title=og.title, link=url, description=og.description, image=getattr(og, 'image', None), source=models.Item.SOURCES['reddit'] ) item.save()
def _import_profile(user_info, token, api_user_id, profileid): """ Given a token and a user info JSON object this will create a 23andMe User. It will also create a Profile object and spawn a job to import the genotype data. """ with record('23andMe.tasks._import_profile', user_id=api_user_id): prof = [ prof for prof in user_info['profiles'] if prof['id'] == profileid ][0] user = User.objects.get(api_user_id=api_user_id) profile = Profile.from_json(prof, user) profile.save() return str(profile.id)
def _convert_genotype(genotype_id): """ Given a genotype, this function converts the genotype data file from the 23 and Me format to a VCF format. """ with record('23andMe.tasks._convert_genotype'): genotype = Genotype.objects.get(id=genotype_id) raw_data = genotype.genotype_file.read().decode('ascii') vcf_data = convert(raw_data) filename = '{}_genotype.vcf'.format(genotype.profile.id) genotype.converted_file.save(name=filename, content=ContentFile(vcf_data)) genotype.save()
def update_news_feed_from_reddit(): with record('tasks.reddit.update_news_feed_from_reddit'): get_urls_from_reddit = _get_urls_from_recent_reddits.si( settings.REDDIT_CLIENT_ID, settings.REDDIT_CLIENT_SECRET, settings.REDDIT_USERNAME, settings.REDDIT_PASSWORD ) workflow = ( get_urls_from_reddit | _filter_urls.s() | dmap.s( _save_opengraph_data_for_url.s() ) ) workflow.delay()
def _import_user(token, api_user_id): """ Given a token and a api_user and a 23andMe profile_id, it fetches user data for that profile from 23andMe and saves the user. :returns user_info: A dict of the 23andMe User/Profile information. """ with record('23andMe.tasks._import_user', user_id=api_user_id): user_info = get_user_info(token['access_token']) user = User.from_json(user_info) user.api_user_id = api_user_id user.save() token = APIToken.from_json(token, user) token.save() return user_info
def create_statuses_for_new_user(user_id): with record('tasks.api.create_statuses_for_new_user', user_id): user = User.objects.get(id=user_id) if not user.is_active: return activities = [ Activity.objects.get(study_task_identifier=study_id) for study_id in settings.DEFAULT_STUDY_IDS ] for activity in activities: if activity.is_tracked_serverside: status = ActivityStatus(user=user, activity=activity) status.save() for metric in LifestyleMetric.objects.all(): status = LifestyleMetricStatus(user=user, metric=metric) status.save()
def _save_opengraph_data_for_urls(urls): """ Given a set of urls, try to save each one's open graph data if it has it. If the Open Graph data doesn't exist then move on. Stop after the first one has been successfully parsed and saved. """ with record('tasks.reddit._save_opengraph_data_for_url'): for url in urls: try: og = OpenGraph(url=url) item = models.Item(title=og.title, link=url, description=og.description, image=getattr(og, 'image', None), source=models.Item.SOURCES['reddit']) item.save() break except Exception: pass
def _store_results(results, user_id): """ Given the results of a user's CAD risk score, store the data. """ with record('tasks.cad._store_results', user_id): ancestries, path, scores = results user = models.User.objects.get(id=user_id) cad = models.Condition.objects.filter(name__iexact='coronary artery disease')[0] for population_name, score in zip(SCORE_RESULTS_ORDER, scores.split('\n')): featured = True if population_name == 'custom' else False population = models.Population.objects.filter(name__iexact=population_name)[0] risk_score = models.RiskScore(user=user, condition=cad, featured=featured, population=population, calculated=True, value=float(score)) risk_score.save() for population_name, per_ancestry in zip(SCORE_RESULTS_ORDER, ancestries.split()): population = models.Population.objects.filter(name__iexact=population_name)[0] ancestry = models.Ancestry(user=user, population=population, value=float(per_ancestry)) ancestry.save()
def get_cad_risk_score(user_id): """ Given an API user id, perform the grs risk score calculations. This is the high level pipeline invocation method used to submit all subsequent and dependent steps. """ with record('tasks.cad.get_cad_risk_score', user_id): step_1 = get_ancestry.s(user_id) steps_2_and_3 = [ _get_cad_haplotypes.s(user_id, chunk[0]) | _impute_and_get_cad_risk_per_chunk.s(user_id, chunk) for chunk in steps.get_chunks() ] step_4 = _get_total_cad_risk.s(user_id) workflow = chord(header=group( [step_1, *steps_2_and_3]), body=step_4) | ( _store_results.s(user_id) | _send_cad_notification.si(user_id) | send_post_cad_survey_to_users.si(user_id)) workflow.delay()
def send_followup_survey_to_users(): """ Search for any users that have seen their scores for 24wks (~6mo) and add a new status for them to complete the followup survey. """ with record('tasks.api.send_followup_survey_to_users'): activity = Activity.objects.get(study_task_identifier=settings.POST_CAD_6MO_SURVEY_ID) six_months_delta = timedelta(weeks=24) date_limit = (timezone.now() - six_months_delta).strftime("%Y-%m-%d") for user in User.objects.filter(is_active=True): risk_score_is_old_enough = len(RiskScore.objects.filter( created_on__lte=date_limit, user=user)) > 0 user_doesnt_already_have_status = len(ActivityStatus.objects.filter( activity=activity, user=user)) == 0 if risk_score_is_old_enough and user_doesnt_already_have_status: follup_status = ActivityStatus(user=user, activity=activity) follup_status.save()
def get_cad_risk_score(user_id): """ Given an API user id, perform the grs risk score calculations. This is the high level pipeline invocation method used to submit all subsequent and dependent steps. """ with record('tasks.cad.get_cad_risk_score', user_id): step_1 = get_ancestry.s(user_id) steps_2_and_3 = [ _get_cad_haplotypes.s(user_id, chunk[0]) | _impute_and_get_cad_risk_per_chunk.s(user_id, chunk) for chunk in steps.get_chunks() ] step_4 = _get_total_cad_risk.s(user_id) notify_user = ( _send_cad_notification.si(user_id) | send_post_cad_survey_to_users.si(user_id) ) workflow = chord( header=group([step_1, *steps_2_and_3]), body=step_4 ) | _store_results.s(user_id) | notify_user workflow.delay()
def _send_cad_notification(user_id): """ Send a Risk Score Notification for the CAD condition. Uses the API method for sending notifications. """ with record('tasks.cad._send_cad_notification', user_id): cad = models.Condition.objects.filter(name__iexact='coronary artery disease')[0] send_risk_score_notification(user_id, cad.name)
def _impute_and_get_cad_risk_per_chunk(haps, user_id, chunk): """ Given a user, the chunk of a chromosome and the known haplotypes for that chromosome, calculate their risk for that given chunk. """ with record('tasks.cad._impute_and_get_cad_risk_per_chunk', user_id): return steps.grs_step_3(uuid.uuid4().hex, *haps, PHENOTYPE, *chunk)
def send_daily_report_to_admins(): with record('tasks.compute.admin.send_daily_report_to_admins'): report = get_daily_report() _send_email_to_admins(report)
def _get_urls_from_recent_reddits(client_id, client_secret, username, password, n=1): """ Fetch all of the urls from the desired subissions on the given subreddits. """ with record('tasks.reddit._get_urls_from_recent_reddits'): return [url for url in _fetch_from_reddit(client_id, client_secret, username, password)][0:n]