def run_manual_code(study_id): """ Create an AWS Batch job for the Study specified :param study_id: Primary key of a Study """ # we assume that the cluster is configured only in one region. pipeline_region = get_current_region() # Get the object ID of the study, used in the pipeline query = Study.objects.filter(pk=study_id) if not query.exists(): return abort(404) object_id = query.get().object_id error_sentry = make_error_sentry("data", tags={"pipeline_frequency": "manually"}) # Get new data access credentials for the manual user, submit a manual job, display message # Report all errors to sentry including DataPipelineNotConfigured errors. with error_sentry: ssm_client = get_boto_client('ssm', pipeline_region) refresh_data_access_credentials('manually', ssm_client=ssm_client, webserver=True) batch_client = get_boto_client('batch', pipeline_region) create_one_job('manually', object_id, batch_client, webserver=True) flash('Data pipeline code successfully initiated!', 'success') if error_sentry.errors: flash('An unknown error occurred when trying to run this task.', category='danger') print error_sentry return redirect('/data-pipeline/{:s}'.format(study_id))
def run_manual_code(study_id): """ Create an AWS Batch job for the Study specified :param study_id: Primary key of a Study """ # Get the object ID of the study, used in the pipeline object_id = Study.objects.get(pk=study_id).object_id error_sentry = make_error_sentry("data", tags={"pipeline_frequency": "manually"}) with error_sentry: # Get new data access credentials for the manual user aws_object_names = get_aws_object_names() refresh_data_access_credentials('manually', aws_object_names) # Submit a manual job create_one_job('manually', object_id) # The success message gets displayed to the user upon redirect flash('Data pipeline code successfully initiated!', 'success') if error_sentry.errors: flash('An unknown error occurred when trying to run this task.', 'danger') return redirect('/data-pipeline/{:s}'.format(study_id))
def run_manual_code(study_id): """ Create an AWS Batch job for the Study specified :param study_id: ObjectId of a Study """ # Get new data access credentials for the manual user aws_object_names = get_aws_object_names() refresh_data_access_credentials('manually', aws_object_names) # Submit a manual job create_one_job('manually', study_id) # The success message gets displayed to the user upon redirect flash('Data pipeline code successfully initiated!', 'success') return redirect('/data-pipeline/{:s}'.format(study_id))
def run_manual_code(study_id): """ Create an AWS Batch job for the Study specified :param study_id: Primary key of a Study """ username = session["admin_username"] destination_email_addresses_string = '' if 'destination_email_addresses' in request.values: destination_email_addresses_string = request.values[ 'destination_email_addresses'] destination_email_addresses = [ d.strip() for d in filter( None, re.split("[, \?:;]+", destination_email_addresses_string)) ] for email_address in destination_email_addresses: if not validate_email(email_address): flash( 'Email address {0} in ({1}) does not appear to be a valid email address.' .format(email_address, destination_email_addresses_string), category='danger') return redirect('/data-pipeline/{:s}'.format(study_id)) destination_email_addresses_string = ','.join( destination_email_addresses) participants_string = '' if 'participants' in request.values: participants_string = request.form.getlist('participants') participants_string = ','.join(participants_string) data_start_time = '' if 'time_start' in request.values: data_start_time = request.values['time_start'] data_end_time = '' if 'time_end' in request.values: data_end_time = request.values['time_end'] # Get the object ID of the study, used in the pipeline query = Study.objects.filter(pk=study_id) if not query.exists(): flash('Could not find study corresponding to study id {0}'.format( study_id), category='danger') return redirect('/data-pipeline/{:s}'.format(study_id)) #return abort(404) object_id = query.get().object_id pipeline_region = os.getenv("pipeline_region", None) if not pipeline_region: pipeline_region = 'us-east-1' flash('Pipeline region not configured, choosing default ({})'.format( pipeline_region), category='info') # return redirect('/data-pipeline/{:s}'.format(study_id)) error_sentry = make_error_sentry("data", tags={"pipeline_frequency": "manually"}) # Get new data access credentials for the manual user, submit a manual job, display message # Report all errors to sentry including DataPipelineNotConfigured errors. with error_sentry: ssm_client = get_boto_client('ssm', pipeline_region) refresh_data_access_credentials('manually', ssm_client=ssm_client) batch_client = get_boto_client('batch', pipeline_region) create_one_job('manually', object_id, username, destination_email_addresses_string, data_start_time, data_end_time, participants_string, batch_client) if data_start_time and data_end_time: flash( 'Data pipeline successfully initiated on data collected between {0} and {1}! Email(s) will be sent to {2} on completion.' .format(data_start_time, data_end_time, destination_email_addresses), 'success') elif data_start_time: flash( 'Data pipeline successfully initiated on data collected after {0}! Email(s) will be sent to {1} on completion.' .format(data_start_time, destination_email_addresses), 'success') elif data_end_time: flash( 'Data pipeline successfully initiated on data collected before {0}! Email(s) will be sent to {1} on completion.' .format(data_start_time, destination_email_addresses), 'success') else: flash( 'Data pipeline successfully initiated! Email(s) will be sent to {0} on completion.' .format(destination_email_addresses), 'success') if error_sentry.errors: flash('An error occurred when trying to execute the pipeline: {0}'. format(error_sentry), category='danger') print(error_sentry) return redirect('/data-pipeline/{:s}'.format(study_id))
from database.data_access_models import ChunkRegistry from database.study_models import Study from libs.sentry import make_error_sentry from pipeline.boto_helpers import get_boto_client from pipeline.configuration_getters import get_current_region from pipeline.index import create_one_job, refresh_data_access_credentials pipeline_region = get_current_region() ssm_client = get_boto_client('ssm', pipeline_region) error_sentry = make_error_sentry("data", tags={"pipeline_frequency": "manually"}) batch_client = get_boto_client('batch', pipeline_region) yesterday = timezone.now() - timedelta(days=1) refresh_data_access_credentials('manually', ssm_client=ssm_client, webserver=False) ################################################################################################ # if you are running this on an ubuntu machine you have to sudo apt-get -y install cloud-utils # ################################################################################################ for study in Study.objects.all(): with error_sentry: # we only want to run the pipeline for data that has been uploaded, Report all errors to sentry for patient_id in ChunkRegistry.get_updated_users_for_study( study, yesterday): create_one_job('manually', study, patient_id, batch_client,