def cross_validate_session(label_type, k=5, multithread=True, raw_data_dir="morf-data/"): """ Compute k-fold cross-validation across sessions. :return: """ raise NotImplementedError # this is not implemented! # todo: call to create_session_folds() goes here job_config = MorfJobConfig(CONFIG_FILENAME) job_config.update_mode(mode) logger = set_logger_handlers(module_logger, job_config) # clear any preexisting data for this user/job/mode # clear_s3_subdirectory(job_config) if multithread: num_cores = job_config.max_num_cores else: num_cores = 1 logger.info("conducting cross validation") with Pool(num_cores) as pool: for raw_data_bucket in job_config.raw_data_buckets: for course in fetch_complete_courses(job_config, raw_data_bucket): for session in fetch_sessions(job_config, raw_data_bucket, data_dir=raw_data_dir, course=course, fetch_all_sessions=True): for fold_num in range(1, k + 1): with tempfile.TemporaryDirectory( dir=job_config.local_working_directory ) as working_dir: # get fold train data input_dir, output_dir = initialize_input_output_dirs( working_dir) session_input_dir = os.path.join( input_dir, course, session) session_output_dir = os.path.join( output_dir, course, session) trainkey = make_s3_key_path( job_config, course, make_feature_csv_name(course, session, fold_num, "train"), session) train_data_path = download_from_s3( job_config.proc_data_bucket, trainkey, job_config.initialize_s3(), dir=session_input_dir, job_config=job_config) testkey = make_s3_key_path( job_config, course, make_feature_csv_name(course, session, fold_num, "test"), session) test_data_path = download_from_s3( job_config.proc_data_bucket, testkey, job_config.initialize_s3(), dir=session_input_dir, job_config=job_config) # get labels initialize_labels(job_config, raw_data_bucket, course, session, label_type, session_input_dir, raw_data_dir) # run docker image with mode == cv #todo # upload results #todo pool.close() pool.join() return
def send_success_email(job_config, emailaddr_from="*****@*****.**"): """ Send an email alert with an attachment. Modified substantially from: http://blog.vero4ka.info/blog/2016/10/26/how-to-send-an-email-with-attachment-via-amazon-ses-in-python/ https://gist.github.com/yosemitebandit/2883593 :param job_config: MorfJobConfig object. :param emailaddr_from: address to send email from (string). :return: """ aws_access_key_id = job_config.aws_access_key_id aws_secret_access_key = job_config.aws_secret_access_key proc_data_bucket = job_config.proc_data_bucket job_id = job_config.job_id user_id = job_config.user_id emailaddr_to = job_config.email_to status = job_config.status job_config.update_mode( "test" ) # need to set mode so that correct key path is used to fetch results results_file_name = "morf-results.csv" s3 = boto3.client("s3", aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) # fetch model evaluation results attachment_basename = generate_archive_filename(job_config, mode="evaluate", extension="csv") key = make_s3_key_path(job_config, filename=attachment_basename) attachment_filepath = download_from_s3(proc_data_bucket, key, s3) with open(attachment_filepath) as f: data = f.read() output = io.StringIO(data) # Build an email subject_text = construct_message_subject(job_config) msg = MIMEMultipart() msg["Subject"] = subject_text msg["From"] = emailaddr_from msg["To"] = emailaddr_to # What a recipient sees if they don't use an email reader msg.preamble = "Multipart message.\n" # the body body_text = construct_message_body(job_config) body = MIMEText(body_text) msg.attach(body) # The attachment part = MIMEApplication(output.getvalue()) part.add_header("Content-Disposition", "attachment", filename=results_file_name) part.add_header("Content-Type", "application/vnd.ms-excel; charset=UTF-8") msg.attach(part) # Connect to Amazon SES ses = boto3.client( "ses", region_name="us-east-1", aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, ) # And finally, send the email try: ses.send_raw_email(Source=emailaddr_from, Destinations=[emailaddr_to, emailaddr_from], RawMessage={ 'Data': msg.as_string(), }) print("[INFO] email notification sent emailaddr_to {}".format( emailaddr_to)) except Exception as e: print("[WARNING] error sending email to {}: {}".format( emailaddr_to, e)) return
def execute_image_for_cv(job_config, raw_data_bucket, course, fold_num, docker_image_dir, label_type, raw_data_dir="morf-data/"): """ :param job_config: :param raw_data_bucket: :param course: :param fold_num: :param docker_image_dir: :param label_type: :param raw_data_dir: :return: """ user_id_col = "userID" logger = set_logger_handlers(module_logger, job_config) with tempfile.TemporaryDirectory( dir=job_config.local_working_directory) as working_dir: input_dir, output_dir = initialize_input_output_dirs(working_dir) # get fold train data course_input_dir = os.path.join(input_dir, course) trainkey = make_s3_key_path( job_config, course, make_feature_csv_name(course, fold_num, "train")) train_data_path = download_from_s3(job_config.proc_data_bucket, trainkey, job_config.initialize_s3(), dir=course_input_dir, job_config=job_config) testkey = make_s3_key_path( job_config, course, make_feature_csv_name(course, fold_num, "test")) test_data_path = download_from_s3(job_config.proc_data_bucket, testkey, job_config.initialize_s3(), dir=course_input_dir, job_config=job_config) # get labels train_users = pd.read_csv(train_data_path)[user_id_col] train_labels_path = initialize_cv_labels(job_config, train_users, raw_data_bucket, course, label_type, input_dir, raw_data_dir, fold_num, "train", level="course") # run docker image with mode == cv image_uuid = load_docker_image(docker_image_dir, job_config, logger) cmd = make_docker_run_command( job_config, job_config.docker_exec, input_dir, output_dir, image_uuid, course, None, mode, job_config.client_args) + " --fold_num {}".format(fold_num) execute_and_log_output(cmd, logger) # upload results pred_csv = os.path.join(output_dir, "{}_{}_test.csv".format(course, fold_num)) pred_key = make_s3_key_path(job_config, course, os.path.basename(pred_csv), mode="test") upload_file_to_s3(pred_csv, job_config.proc_data_bucket, pred_key, job_config, remove_on_success=True) return