def monitor(instance_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: # Fetch the actual instance reservations = ec2.get_all_reservations(instance_ids=[instance_id]) instance = reservations[0].instances[0] except IndexError: return "No such instance: {}".format(instance_id), 404 # Check that it is the owner who is logged in if instance.tags['Owner'] != current_user.email: return "No such instance: {}".format(instance_id), 404 # Get a datetime representing when the cluster will be terminated terminate_time = get_termination_time(instance.launch_time) # Alright then, let's report status return render_template( 'monitor.html', instance_id=instance_id, instance_state=instance.state, public_dns=instance.public_dns_name, terminate_url=abs_url_for('kill', instance_id=instance.id), terminate_time=terminate_time.strftime("%Y-%m-%d %H:%M:%S"), terminate_hours_left=get_hours_remaining(terminate_time))
def kill(instance_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: # Fetch the actual instance reservations = ec2.get_all_reservations(instance_ids=[instance_id]) instance = reservations[0].instances[0] except IndexError: return "No such instance: {}".format(instance_id), 404 # Check that it is the owner who is logged in if instance.tags['Owner'] != current_user.email: return "No such instance: {}".format(instance_id), 404 # Terminate and update instance instance.terminate() instance.update() # Alright then, let's report status return render_template('kill.html', instance_id=instance_id, instance_state=instance.state, public_dns=instance.public_dns_name, monitoring_url=abs_url_for('monitor', instance_id=instance.id))
def cluster_monitor(jobflow_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: jobflow = emr.describe_jobflow(jobflow_id) cluster = emr.describe_cluster(jobflow_id) except: return "No such cluster: {}".format(jobflow_id), 404 if get_tag_value(cluster.tags, "Owner") != current_user.email: return "No such cluster: {}".format(jobflow_id), 404 # Get a datetime representing when the cluster will be terminated terminate_time = get_termination_time(getattr(jobflow, "creationdatetime", datetime.utcnow())): # Alright then, let's report status return render_template( 'cluster/monitor.html', jobflow_id = jobflow_id, instance_state = jobflow.state, public_dns = jobflow.masterpublicdnsname if hasattr(jobflow, "masterpublicdnsname") else None, terminate_url = abs_url_for('cluster_kill', jobflow_id = jobflow_id), terminate_time = terminate_time.strftime("%Y-%m-%d %H:%M:%S"), terminate_hours_left = max(0, int((datetime.utcnow() - terminate_time).total_seconds()) // 3600) )
def cluster_kill(jobflow_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: jobflow = emr.describe_jobflow(jobflow_id) cluster = emr.describe_cluster(jobflow_id) except: return "No such cluster: {}".format(jobflow_id), 404 if get_tag_value(cluster.tags, "Owner") != current_user.email: return "No such cluster: {}".format(jobflow_id), 404 # Terminate cluster emr.terminate_jobflow(jobflow_id) # Alright then, let's report status return render_template( 'cluster/kill.html', jobflow_id=jobflow_id, jobflow_state=jobflow.state, public_dns=jobflow.masterpublicdnsname if hasattr( jobflow, "masterpublicdnsname") else None, )
def cluster_monitor(jobflow_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: cluster = emr.describe_cluster(jobflow_id) except Exception as e: return "No such cluster: {} -- error: {}".format(jobflow_id, e), 404 if get_tag_value(cluster.tags, "Owner") != current_user.email: return "No such cluster: {}".format(jobflow_id), 404 # Get a datetime representing when the cluster will be terminated creation_time = getattr(cluster.status.timeline, "creationdatetime", datetime.utcnow()) terminate_time = get_termination_time(creation_time) # Alright then, let's report status return render_template( 'cluster/monitor.html', jobflow_id=jobflow_id, instance_state=cluster.status.state, public_dns=cluster.masterpublicdnsname if hasattr( cluster, "masterpublicdnsname") else None, terminate_url=abs_url_for('cluster_kill', jobflow_id=jobflow_id), terminate_time=terminate_time.strftime("%Y-%m-%d %H:%M:%S"), terminate_hours_left=get_hours_remaining(terminate_time))
def edit_scheduled_job(job_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() if request.method != 'GET' and request.method != 'POST': return "Unsupported method: {0}".format(request.method), 405 job = get_job(job_id=job_id) if job is None: return "No such job {0}".format(job_id), 404 elif job['owner'] != current_user.email: return "Can't edit job {0}".format(job_id), 401 if request.method == 'GET': # Show job details. return render_template("schedule.html", values=job_to_form(job)) # else: request.method == 'POST' # update this job's details if request.form['job-id'] != job_id: return "Mismatched job id", 400 errors = {} for f in ['job-name', 'commandline', 'output-dir', 'schedule-frequency', 'schedule-time-of-day', 'timeout']: val = request.form[f] if val is None or val.strip() == '': errors[f] = "This field is required" time_of_day = -1 try: time_of_day = get_required_int(request, 'schedule-time-of-day', "Time of Day", max_value=23) except ValueError, e: errors['schedule-time-of-day'] = e.message
def cluster_monitor(jobflow_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: cluster = emr.describe_cluster(jobflow_id) except Exception as e: return "No such cluster: {} -- error: {}".format(jobflow_id, e), 404 if get_tag_value(cluster.tags, "Owner") != current_user.email: return "No such cluster: {}".format(jobflow_id), 404 # Get a datetime representing when the cluster will be terminated creation_time = getattr(cluster.status.timeline, "creationdatetime", datetime.utcnow()) terminate_time = get_termination_time(creation_time) # Alright then, let's report status return render_template( 'cluster/monitor.html', jobflow_id = jobflow_id, instance_state = cluster.status.state, public_dns = cluster.masterpublicdnsname if hasattr(cluster, "masterpublicdnsname") else None, terminate_url = abs_url_for('cluster_kill', jobflow_id = jobflow_id), terminate_time = terminate_time.strftime("%Y-%m-%d %H:%M:%S"), terminate_hours_left = get_hours_remaining(terminate_time) )
def kill(instance_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: # Fetch the actual instance reservations = ec2.get_all_reservations(instance_ids=[instance_id]) instance = reservations[0].instances[0] except IndexError: return "No such instance" # Check that it is the owner who is logged in if instance.tags["Owner"] != current_user.email: return login_manager.unauthorized() # Terminate and update instance instance.terminate() instance.update() # Alright then, let's report status return render_template( "kill.html", instance_state=instance.state, public_dns=instance.public_dns_name, monitoring_url=abs_url_for("monitor", instance_id=instance.id), )
def monitor(instance_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: # Fetch the actual instance reservations = ec2.get_all_reservations(instance_ids = [instance_id]) instance = reservations[0].instances[0] except IndexError: return "No such instance: {}".format(instance_id), 404 # Check that it is the owner who is logged in if instance.tags['Owner'] != current_user.email: return "No such instance: {}".format(instance_id), 404 # Get a datetime representing when the cluster will be terminated terminate_time = get_termination_time(instance.launch_time) # Alright then, let's report status return render_template( 'monitor.html', instance_id = instance_id, instance_state = instance.state, public_dns = instance.public_dns_name, terminate_url = abs_url_for('kill', instance_id = instance.id), terminate_time = terminate_time.strftime("%Y-%m-%d %H:%M:%S"), terminate_hours_left = get_hours_remaining(terminate_time) )
def get_worker_params(errors=None, values=None): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() return render_template('worker.html', errors=errors, values=values, token=str(uuid4()))
def edit_scheduled_job(job_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() if request.method != 'GET' and request.method != 'POST': return "Unsupported method: {0}".format(request.method), 405 old_job = get_job(job_id=job_id) if old_job is None: return "No such job {0}".format(job_id), 404 elif old_job['owner'] != current_user.email: return "Can't edit job {0}".format(job_id), 401 if request.method == 'GET': # Show job details. return render_template("schedule.html", values=job_to_form(old_job)) # else: request.method == 'POST' # update this job's details if request.form['job-id'] != job_id: return "Mismatched job id", 400 new_job, job_meta, errors = validate_job_form(request, is_update=True, old_job=old_job) # If there were any errors, stop and re-display the form. if errors: return render_template("schedule.html", values=request.form, errors=errors) # Upload code if need be: if request.files['code-tarball']: err = upload_code(request.form["job-name"], request.files["code-tarball"]) if err is not None: errors["code-tarball"] = err return render_template("schedule.html", values=request.form, errors=errors) result = update_job(new_job) if result.rowcount > 0: print "Updated job id", job_id jobs = [] for j in get_jobs(): jobs.append(j) update_configs(jobs) update_crontab(jobs) return render_template('schedule_create.html', result = result, code_s3path = job_meta["code_s3path"], data_s3path = job_meta["data_s3path"], commandline = new_job["commandline"], output_dir = new_job["output_dir"], job_frequency = job_meta["frequency"], job_time = hour_to_time(new_job["schedule_hour"]), job_dow = job_meta["job_dow"], job_dom = job_meta["job_dom"], job_timeout = new_job["timeout_minutes"], cron_spec = job_meta["cron_spec"] )
def schedule_job(errors=None, values=None): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() jobs = [] for job in get_jobs(current_user.email): jobs.append(job) return render_template('schedule.html', jobs=jobs, errors=errors, values=values)
def _schedule_job(is_cluster, errors=None, values=None): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() template = "cluster/schedule.html" if is_cluster else "schedule.html" jobs = [] for job in get_jobs(current_user.email, is_cluster=is_cluster): jobs.append(job) return render_template(template, jobs=jobs, errors=errors, values=values)
def view_job_data(job_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() job = get_job(job_id=job_id) if job is None: return "No such job {0}".format(job_id), 404 elif job['owner'] == current_user.email: # OK, this job is yours. Time to dig up the logs. files = get_job_files(job, "data") return render_template('schedule_files.html', name="data", files=files, job=job) return "Can't view data for job {0}".format(job_id), 401
def _create_scheduled_job(is_cluster): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() if is_cluster: schedule = cluster_schedule_job template = 'cluster/schedule_create.html' else: template = 'schedule_create.html' schedule = schedule_job job, job_meta, errors = _validate_job_form(is_cluster, request, is_update=False) # If there were any errors, stop and re-display the form. # It's only polite to render the form with the previously-supplied # values filled in. Unfortunately doing so for files doesn't seem to be # worth the effort. if errors: return schedule(errors, request.form) err = upload_code(request.form["job-name"], request.files["code"]) if err is not None: errors["code"] = err return schedule(errors, request.form) result = insert_job(job) if result.inserted_primary_key > 0: print "Inserted job id", result.inserted_primary_key jobs = [] for j in get_jobs(): jobs.append(j) update_configs(jobs) update_crontab(jobs) return render_template(template, result=result, code_s3path=job_meta["code_s3path"], data_s3path=job_meta["data_s3path"], commandline=job["commandline"], output_dir=job["output_dir"], job_frequency=job_meta["frequency"], job_time=hour_to_time(job["schedule_hour"]), job_dow=job_meta["job_dow"], job_dom=job_meta["job_dom"], job_timeout=job["timeout_minutes"], cron_spec=job_meta["cron_spec"])
def _view_job_data(is_cluster, job_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() template = "cluster/schedule_files.html" if is_cluster else "schedule_files.html" job = get_job(job_id=job_id) if job is None: return "No such job {0}".format(job_id), 404 elif job['owner'] == current_user.email: # OK, this job is yours. Time to dig up the logs. files = get_job_files(job, "data") return render_template(template, name="data", files=files, job=job) return "Can't view data for job {0}".format(job_id), 401
def _create_scheduled_job(is_cluster): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() if is_cluster: schedule = cluster_schedule_job template = 'cluster/schedule_create.html' else: template = 'schedule_create.html' schedule = schedule_job job, job_meta, errors = _validate_job_form(is_cluster, request, is_update=False) # If there were any errors, stop and re-display the form. # It's only polite to render the form with the previously-supplied # values filled in. Unfortunately doing so for files doesn't seem to be # worth the effort. if errors: return schedule(errors, request.form) err = upload_code(request.form["job-name"], request.files["code"]) if err is not None: errors["code"] = err return schedule(errors, request.form) result = insert_job(job) if result.inserted_primary_key > 0: print "Inserted job id", result.inserted_primary_key jobs = [] for j in get_jobs(): jobs.append(j) update_configs(jobs) update_crontab(jobs) return render_template(template, result = result, code_s3path = job_meta["code_s3path"], data_s3path = job_meta["data_s3path"], commandline = job["commandline"], output_dir = job["output_dir"], job_frequency = job_meta["frequency"], job_time = hour_to_time(job["schedule_hour"]), job_dow = job_meta["job_dow"], job_dom = job_meta["job_dom"], job_timeout = job["timeout_minutes"], cron_spec = job_meta["cron_spec"] )
def view_job_logs(job_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() job = get_job(job_id=job_id) if job is None: return "No such job {0}".format(job_id), 404 elif job['owner'] == current_user.email: # OK, this job is yours. Time to dig up the logs. logs = get_job_logs(job) # TODO: Add a "<delete>" link # Add a "<delete all logs>" link return render_template('schedule_files.html', name="log", files=logs, job=job) return "Can't view logs for job {0}".format(job_id), 401
def delete_scheduled_job(job_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() job = get_job(job_id=job_id) if job is None: return "No such job {0}".format(job_id), 404 elif job['owner'] == current_user.email: # OK, this job is yours. let's delete it. result = delete_job(job_id, current_user.email) if result.rowcount == 1: # We don't have to update the configs, though maybe we should # delete this job's config to clean up. update_crontab() return render_template('schedule_delete.html', result=result, job=job) return "Can't delete job {0}".format(job_id), 401
def spawn_debug_instance(): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() # Upload s3 key to bucket sshkey = bucket.new_key("keys/%s.pub" % request.form["token"]) sshkey.set_contents_from_file(request.files["public-ssh-key"]) # Create boot_script = render_template( "boot-script.sh", aws_region=app.config["AWS_REGION"], temporary_bucket=app.config["TEMPORARY_BUCKET"], ssh_key=sshkey.key, ) # Create EC2 instance reservation = ec2.run_instances( image_id="ami-ace67f9c", security_groups=app.config["SECURITY_GROUPS"], user_data=boot_script, instance_type=app.config["INSTANCE_TYPE"], instance_initiated_shutdown_behavior="terminate", client_token=request.form["token"], instance_profile_name=app.config["INSTANCE_PROFILE"], ) instance = reservation.instances[0] # Associate a few tags ec2.create_tags( [instance.id], {"Owner": current_user.email, "Name": request.form["name"], "Application": app.config["INSTANCE_APP_TAG"]}, ) # Send an email to the user who launched it params = {"monitoring_url": abs_url_for("monitor", instance_id=instance.id)} ses.send_email( source=app.config["EMAIL_SOURCE"], subject=("telemetry-analysis debug instance: %s (%s) launched" % (request.form["name"], instance.id)), format="html", body=render_template("instance-launched-email.html", **params), to_addresses=[current_user.email], ) return redirect(url_for("monitor", instance_id=instance.id))
def create_scheduled_job(): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() errors = {} for f in ['job-name', 'commandline', 'output-dir', 'schedule-frequency', 'schedule-time-of-day', 'timeout']: val = request.form[f] if val is None or val.strip() == '': errors[f] = "This field is required" time_of_day = -1 try: time_of_day = get_required_int(request, 'schedule-time-of-day', "Time of Day", max_value=23) except ValueError, e: errors['schedule-time-of-day'] = e.message
def _view_job_logs(is_cluster, job_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() template = "cluster/schedule_files.html" if is_cluster else "schedule_files.html" job = get_job(job_id=job_id) if job is None: return "No such job {0}".format(job_id), 404 elif job['owner'] == current_user.email: # OK, this job is yours. Time to dig up the logs. logs = get_job_logs(job) # TODO: Add a "<delete>" link # Add a "<delete all logs>" link return render_template(template, name="log", files=logs, job=job) return "Can't view logs for job {0}".format(job_id), 401
def cluster_monitor(jobflow_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: jobflow = emr.describe_jobflow(jobflow_id) cluster = emr.describe_cluster(jobflow_id) except: return "No such cluster: {}".format(jobflow_id), 404 if get_tag_value(cluster.tags, "Owner") != current_user.email: return "No such cluster: {}".format(jobflow_id), 404 # Alright then, let's report status return render_template( 'cluster/monitor.html', jobflow_id = jobflow_id, instance_state = jobflow.state, public_dns = jobflow.masterpublicdnsname if hasattr(jobflow, "masterpublicdnsname") else None, terminate_url = abs_url_for('cluster_kill', jobflow_id = jobflow_id) )
def cluster_kill(jobflow_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: cluster = emr.describe_cluster(jobflow_id) except: return "No such cluster: {}".format(jobflow_id), 404 if get_tag_value(cluster.tags, "Owner") != current_user.email: return "No such cluster: {}".format(jobflow_id), 404 # Terminate cluster emr.terminate_jobflow(jobflow_id) # Alright then, let's report status return render_template( 'cluster/kill.html', jobflow_id = jobflow_id, jobflow_state = cluster.status.state, public_dns = cluster.masterpublicdnsname if hasattr(cluster, "masterpublicdnsname") else None, )
def monitor(instance_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() try: # Fetch the actual instance reservations = ec2.get_all_reservations(instance_ids = [instance_id]) instance = reservations[0].instances[0] except IndexError: return "No such instance: {}".format(instance_id), 404 # Check that it is the owner who is logged in if instance.tags['Owner'] != current_user.email: return "No such instance: {}".format(instance_id), 404 # Alright then, let's report status return render_template( 'monitor.html', instance_id = instance_id, instance_state = instance.state, public_dns = instance.public_dns_name, terminate_url = abs_url_for('kill', instance_id = instance.id) )
def cluster_spawn(): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() errors = {} # Check required fields for f in ['name', 'token', 'num_workers']: val = request.form[f] if val is None or val.strip() == '': errors[f] = "This field is required" if ' ' in request.form['name']: errors['name'] = "Spaces are not allowed in the cluster name." try: n_workers = int(request.form["num_workers"]) if n_workers <= 0 or n_workers > 20: raise Exception except: errors[ "num_workers"] = "This field should be a positive number within [1, 20]." # Check required file if not request.files['public-ssh-key']: errors['code'] = "Public key file is required" # Bug 961200: Check that a proper OpenSSH public key was uploaded. # It should start with "ssh-rsa AAAAB3" pubkey = request.files['public-ssh-key'].read().rstrip() if not validate_public_key(pubkey): errors[ 'public-ssh-key'] = "Supplied file does not appear to be a valid OpenSSH public key." if errors: return cluster_get_params(errors, request.form) # Create EMR cluster n_instances = n_workers if n_workers == 1 else n_workers + 1 out = check_output([ "aws", "emr", "create-cluster", "--region", app.config["AWS_REGION"], "--name", request.form['token'], "--instance-type", app.config["INSTANCE_TYPE"], "--instance-count", str(n_instances), "--service-role", "EMR_DefaultRole", "--ec2-attributes", "KeyName=mozilla_vitillo,InstanceProfile={}".format( app.config["SPARK_INSTANCE_PROFILE"]), "--release-label", app.config["EMR_RELEASE"], "--applications", "Name=Spark", "Name=Hive", "--bootstrap-actions", "Path=s3://{}/bootstrap/telemetry.sh,Args=[\"--public-key\",\"{}\"]". format(app.config["SPARK_EMR_BUCKET"], pubkey), "--configurations", "https://s3-{}.amazonaws.com/{}/configuration/configuration.json". format(app.config["AWS_REGION"], app.config["SPARK_EMR_BUCKET"]) ]) jobflow_id = json.loads(out)["ClusterId"] # Associate a few tags emr.add_tags( jobflow_id, { "Owner": current_user.email, "Name": request.form['name'], "Application": app.config['INSTANCE_APP_TAG'] }) # Send an email to the user who launched it params = { 'monitoring_url': abs_url_for('cluster_monitor', jobflow_id=jobflow_id) } ses.send_email(source=app.config['EMAIL_SOURCE'], subject=("telemetry-analysis cluster: %s (%s) launched" % (request.form['name'], jobflow_id)), format='html', body=render_template('cluster/email.html', **params), to_addresses=[current_user.email]) return redirect(url_for('cluster_monitor', jobflow_id=jobflow_id))
def cluster_spawn(): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() errors = {} # Check required fields for f in ['name', 'token', 'num_workers']: val = request.form[f] if val is None or val.strip() == '': errors[f] = "This field is required" if ' ' in request.form['name']: errors['name'] = "Spaces are not allowed in the cluster name." try: n_workers = int(request.form["num_workers"]) if n_workers <= 0 or n_workers > 20: raise Exception except: errors["num_workers"] = "This field should be a positive number within [1, 20]." # Check required file if not request.files['public-ssh-key']: errors['code'] = "Public key file is required" # Bug 961200: Check that a proper OpenSSH public key was uploaded. # It should start with "ssh-rsa AAAAB3" pubkey = request.files['public-ssh-key'].read() if not validate_public_key(pubkey): errors['public-ssh-key'] = "Supplied file does not appear to be a valid OpenSSH public key." if errors: return cluster_get_params(errors, request.form) # Create EMR cluster n_instances = n_workers if n_workers == 1 else n_workers + 1 install_spark_bootstrap = BootstrapAction('Install Spark', 's3://support.elasticmapreduce/spark/install-spark', ['-v', app.config['SPARK_VERSION']]) setup_telemetry_bootstrap = BootstrapAction('Setup Telemetry', 's3://{}/telemetry.sh'.format(app.config['SPARK_EMR_BUCKET']), ['--public-key', pubkey]) configure_yarn_bootstrap = BootstrapAction('Configure YARN', 's3://elasticmapreduce/bootstrap-actions/configure-hadoop', ['-y', 'yarn.nodemanager.vmem-check-enabled=false', '-y', 'yarn.nodemanager.pmem-check-enabled=false']) jobflow_id = emr.run_jobflow(name = request.form['token'], ec2_keyname = 'mozilla_vitillo', master_instance_type = app.config['MASTER_INSTANCE_TYPE'], slave_instance_type = app.config['SLAVE_INSTANCE_TYPE'], num_instances = n_instances, ami_version = app.config['AMI_VERSION'], service_role = 'EMR_DefaultRole', job_flow_role = app.config['SPARK_INSTANCE_PROFILE'], visible_to_all_users = True, keep_alive = True, bootstrap_actions = [install_spark_bootstrap, setup_telemetry_bootstrap, configure_yarn_bootstrap]) # Associate a few tags emr.add_tags(jobflow_id, { "Owner": current_user.email, "Name": request.form['name'], "Application": app.config['INSTANCE_APP_TAG'] }) # Send an email to the user who launched it params = { 'monitoring_url': abs_url_for('cluster_monitor', jobflow_id = jobflow_id) } ses.send_email( source = app.config['EMAIL_SOURCE'], subject = ("telemetry-analysis cluster: %s (%s) launched" % (request.form['name'], jobflow_id)), format = 'html', body = render_template('cluster/email.html', **params), to_addresses = [current_user.email] ) return redirect(url_for('cluster_monitor', jobflow_id = jobflow_id))
def spawn_worker_instance(): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() errors = {} # Check required fields for f in ['name', 'token']: val = request.form[f] if val is None or val.strip() == '': errors[f] = "This field is required" # Check required file if not request.files['public-ssh-key']: errors['code-tarball'] = "Public key file is required" # Bug 961200: Check that a proper OpenSSH public key was uploaded. # It should start with "ssh-rsa AAAAB3" pubkey = request.files['public-ssh-key'].read() if not pubkey.startswith("ssh-rsa AAAAB3") and not pubkey.startswith( "ssh-dss AAAAB3"): errors[ 'public-ssh-key'] = "Supplied file does not appear to be a valid OpenSSH public key." if errors: return get_worker_params(errors, request.form) # Upload s3 key to bucket sshkey = bucket.new_key("keys/%s.pub" % request.form['token']) sshkey.set_contents_from_string(pubkey) ephemeral = app.config.get("EPHEMERAL_MAP", None) # Create boot_script = render_template( 'boot-script.sh', aws_region=app.config['AWS_REGION'], temporary_bucket=app.config['TEMPORARY_BUCKET'], ssh_key=sshkey.key, ephemeral_map=ephemeral) mapping = None if ephemeral: mapping = BlockDeviceMapping() for device, eph_name in ephemeral.iteritems(): mapping[device] = BlockDeviceType(ephemeral_name=eph_name) # Create EC2 instance reservation = ec2.run_instances( image_id='ami-ace67f9c', security_groups=app.config['SECURITY_GROUPS'], user_data=boot_script, block_device_map=mapping, instance_type=app.config['INSTANCE_TYPE'], instance_initiated_shutdown_behavior='terminate', client_token=request.form['token'], instance_profile_name=app.config['INSTANCE_PROFILE']) instance = reservation.instances[0] # Associate a few tags ec2.create_tags( [instance.id], { "Owner": current_user.email, "Name": request.form['name'], "Application": app.config['INSTANCE_APP_TAG'] }) # Send an email to the user who launched it params = { 'monitoring_url': abs_url_for('monitor', instance_id=instance.id) } ses.send_email( source=app.config['EMAIL_SOURCE'], subject=("telemetry-analysis worker instance: %s (%s) launched" % (request.form['name'], instance.id)), format='html', body=render_template('instance-launched-email.html', **params), to_addresses=[current_user.email]) return redirect(url_for('monitor', instance_id=instance.id))
def edit_scheduled_job(job_id): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() if request.method != 'GET' and request.method != 'POST': return "Unsupported method: {0}".format(request.method), 405 old_job = get_job(job_id=job_id) if old_job is None: return "No such job {0}".format(job_id), 404 elif old_job['owner'] != current_user.email: return "Can't edit job {0}".format(job_id), 401 if request.method == 'GET': # Show job details. return render_template("schedule.html", values=job_to_form(old_job)) # else: request.method == 'POST' # update this job's details if request.form['job-id'] != job_id: return "Mismatched job id", 400 new_job, job_meta, errors = validate_job_form(request, is_update=True, old_job=old_job) # If there were any errors, stop and re-display the form. if errors: return render_template("schedule.html", values=request.form, errors=errors) # Upload code if need be: if request.files['code-tarball']: err = upload_code(request.form["job-name"], request.files["code-tarball"]) if err is not None: errors["code-tarball"] = err return render_template("schedule.html", values=request.form, errors=errors) result = update_job(new_job) if result.rowcount > 0: print "Updated job id", job_id jobs = [] for j in get_jobs(): jobs.append(j) update_configs(jobs) update_crontab(jobs) return render_template('schedule_create.html', result=result, code_s3path=job_meta["code_s3path"], data_s3path=job_meta["data_s3path"], commandline=new_job["commandline"], output_dir=new_job["output_dir"], job_frequency=job_meta["frequency"], job_time=hour_to_time(new_job["schedule_hour"]), job_dow=job_meta["job_dow"], job_dom=job_meta["job_dom"], job_timeout=new_job["timeout_minutes"], cron_spec=job_meta["cron_spec"])
def is_authorized(self): return current_user.is_authenticated() and \ current_user.is_authorized('usealerts')
def cluster_spawn(): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() errors = {} # Check required fields for f in ['name', 'token', 'num_workers']: val = request.form[f] if val is None or val.strip() == '': errors[f] = "This field is required" try: n_workers = int(request.form["num_workers"]) if n_workers <= 0 or n_workers > 20: raise Exception except: errors[ "num_workers"] = "This field should be a positive number within [1, 20]." # Check required file if not request.files['public-ssh-key']: errors['code'] = "Public key file is required" # Bug 961200: Check that a proper OpenSSH public key was uploaded. # It should start with "ssh-rsa AAAAB3" pubkey = request.files['public-ssh-key'].read() if not validate_public_key(pubkey): errors[ 'public-ssh-key'] = "Supplied file does not appear to be a valid OpenSSH public key." if errors: return cluster_get_params(errors, request.form) # Create EMR cluster n_instances = n_workers if n_workers == 1 else n_workers + 1 install_spark_bootstrap = BootstrapAction( 'Install Spark', 's3://support.elasticmapreduce/spark/install-spark', ['-v', app.config['SPARK_VERSION']]) setup_telemetry_bootstrap = BootstrapAction( 'Setup Telemetry', 's3://telemetry-spark-emr/telemetry.sh', ['--public-key', pubkey]) jobflow_id = emr.run_jobflow( name=request.form['token'], ec2_keyname='mozilla_vitillo', master_instance_type=app.config['MASTER_INSTANCE_TYPE'], slave_instance_type=app.config['SLAVE_INSTANCE_TYPE'], num_instances=n_instances, ami_version=app.config['AMI_VERSION'], service_role='EMR_DefaultRole', job_flow_role='telemetry-spark-emr', visible_to_all_users=True, keep_alive=True, bootstrap_actions=[install_spark_bootstrap, setup_telemetry_bootstrap]) # Associate a few tags emr.add_tags( jobflow_id, { "Owner": current_user.email, "Name": request.form['name'], "Application": app.config['INSTANCE_APP_TAG'] }) # Send an email to the user who launched it params = { 'monitoring_url': abs_url_for('cluster_monitor', jobflow_id=jobflow_id) } ses.send_email(source=app.config['EMAIL_SOURCE'], subject=("telemetry-analysis cluster: %s (%s) launched" % (request.form['name'], jobflow_id)), format='html', body=render_template('cluster/email.html', **params), to_addresses=[current_user.email]) return redirect(url_for('cluster_monitor', jobflow_id=jobflow_id))
def spawn_worker_instance(): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() errors = {} # Check required fields for f in ['name', 'token']: val = request.form[f] if val is None or val.strip() == '': errors[f] = "This field is required" # Check required file if not request.files['public-ssh-key']: errors['code'] = "Public key file is required" # Bug 961200: Check that a proper OpenSSH public key was uploaded. # It should start with "ssh-rsa AAAAB3" pubkey = request.files['public-ssh-key'].read() if not validate_public_key(pubkey): errors['public-ssh-key'] = "Supplied file does not appear to be a valid OpenSSH public key." if errors: return get_worker_params(errors, request.form) # Upload s3 key to bucket sshkey = bucket.new_key("keys/%s.pub" % request.form['token']) sshkey.set_contents_from_string(pubkey) ephemeral = app.config.get("EPHEMERAL_MAP", None) # Create boot_script = render_template('boot-script.sh', aws_region = app.config['AWS_REGION'], temporary_bucket = app.config['TEMPORARY_BUCKET'], ssh_key = sshkey.key, ephemeral_map = ephemeral ) mapping = None if ephemeral: mapping = BlockDeviceMapping() for device, eph_name in ephemeral.iteritems(): mapping[device] = BlockDeviceType(ephemeral_name=eph_name) # Create EC2 instance reservation = ec2.run_instances( image_id = 'ami-2cfe1a1f', # ubuntu/images/hvm/ubuntu-vivid-15.04-amd64-server-20151006 security_groups = app.config['SECURITY_GROUPS'], user_data = boot_script, block_device_map = mapping, instance_type = app.config['INSTANCE_TYPE'], instance_initiated_shutdown_behavior = 'terminate', client_token = request.form['token'], instance_profile_name = app.config['INSTANCE_PROFILE'] ) instance = reservation.instances[0] # Associate a few tags ec2.create_tags([instance.id], { "Owner": current_user.email, "Name": request.form['name'], "Application": app.config['INSTANCE_APP_TAG'], "App": app.config['ACCOUNTING_APP_TAG'], "Type": app.config['ACCOUNTING_TYPE_TAG'] }) # Send an email to the user who launched it params = { 'monitoring_url': abs_url_for('monitor', instance_id = instance.id) } ses.send_email( source = app.config['EMAIL_SOURCE'], subject = ("telemetry-analysis worker instance: %s (%s) launched" % (request.form['name'], instance.id)), format = 'html', body = render_template('instance-launched-email.html', **params), to_addresses = [current_user.email] ) return redirect(url_for('monitor', instance_id = instance.id))
def cluster_get_params(errors=None, values=None): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() return render_template('cluster/cluster.html', errors=errors, values=values, token=str(uuid4()))
def is_authorized(self): return current_user.is_authenticated() and \ current_user.is_authorized('usemessages')
def cluster_spawn(): # Check that the user logged in is also authorized to do this if not current_user.is_authorized(): return login_manager.unauthorized() errors = {} # Check required fields for f in ['name', 'token', 'num_workers']: val = request.form[f] if val is None or val.strip() == '': errors[f] = "This field is required" if ' ' in request.form['name']: errors['name'] = "Spaces are not allowed in the cluster name." try: n_workers = int(request.form["num_workers"]) if n_workers <= 0 or n_workers > 20: raise Exception except: errors["num_workers"] = "This field should be a positive number within [1, 20]." # Check required file if not request.files['public-ssh-key']: errors['code'] = "Public key file is required" # Bug 961200: Check that a proper OpenSSH public key was uploaded. # It should start with "ssh-rsa AAAAB3" pubkey = request.files['public-ssh-key'].read().rstrip() if not validate_public_key(pubkey): errors['public-ssh-key'] = "Supplied file does not appear to be a valid OpenSSH public key." if errors: return cluster_get_params(errors, request.form) # Create EMR cluster n_instances = n_workers if n_workers == 1 else n_workers + 1 out = check_output(["aws", "emr", "create-cluster", "--region", app.config["AWS_REGION"], "--name", request.form['token'], "--instance-type", app.config["INSTANCE_TYPE"], "--instance-count", str(n_instances), "--service-role", "EMR_DefaultRole", "--ec2-attributes", "KeyName=mozilla_vitillo,InstanceProfile={}".format(app.config["SPARK_INSTANCE_PROFILE"]), "--release-label", app.config["EMR_RELEASE"], "--applications", "Name=Spark", "Name=Hive", "--bootstrap-actions", "Path=s3://{}/bootstrap/telemetry.sh,Args=[\"--public-key\",\"{}\"]".format(app.config["SPARK_EMR_BUCKET"], pubkey), "--configurations", "https://s3-{}.amazonaws.com/{}/configuration/configuration.json".format(app.config["AWS_REGION"], app.config["SPARK_EMR_BUCKET"])]) jobflow_id = json.loads(out)["ClusterId"] # Associate a few tags emr.add_tags(jobflow_id, { "Owner": current_user.email, "Name": request.form['name'], "Application": app.config['INSTANCE_APP_TAG'], "App": app.config['ACCOUNTING_APP_TAG'], "Type": app.config['ACCOUNTING_TYPE_TAG'] }) # Send an email to the user who launched it params = { 'monitoring_url': abs_url_for('cluster_monitor', jobflow_id = jobflow_id) } ses.send_email( source = app.config['EMAIL_SOURCE'], subject = ("telemetry-analysis cluster: %s (%s) launched" % (request.form['name'], jobflow_id)), format = 'html', body = render_template('cluster/email.html', **params), to_addresses = [current_user.email] ) return redirect(url_for('cluster_monitor', jobflow_id = jobflow_id))