def check_loading_cache(params, output_loc, v_type): # Compute hash of the passed params try: param_hash = get_param_hash(params) except Exception as e: from utils import save_error save_error('Error caching passed params', output_loc, e) # Check if params are cached if os.path.exists(os.path.join(LOADING_CACHE_DR, param_hash)): # Copy saved results / output to the output_loc saved_loc = os.path.join(LOADING_CACHE_DR, param_hash) copyfile(saved_loc, output_loc) # Increment the input cache incr_input_cache(v_type, params) # Exit the script, as everything is done sys.exit() # Otherwise, just return the param hash # to be used to eventually save the output return param_hash
def check_loaded_cache(params, output_loc, user_dr, n): # Compute hash of the passed params try: param_hash = get_param_hash(params) except Exception as e: from utils import save_error save_error('Error caching passed params', output_loc, e) # Check if params are already cached saved_loc = os.path.join(LOADED_CACHE_DR, param_hash) if os.path.exists(saved_loc): # If exists load saved ML obj, overriding old cache info ML = Load(saved_loc, exp_name='ML_Logs_' + str(n), log_dr=os.path.join(user_dr, 'temp'), existing_log='overwrite') # Update timestamp Path(saved_loc).touch() return ML # If not caches return the param_hash return param_hash
def main(user_dr, n): temp_dr = os.path.join(user_dr, 'temp') output_loc = os.path.join(temp_dr, 'ML_Output_' + str(n) + '.json') # Load in params params = load_params(user_dr, output_loc, n) # Base apply test ML = base_test_load(params, user_dr, output_loc, n) ML._print('loaded') log_dr = os.path.join(temp_dr, 'ML_Logs_' + str(n)) # Create output results output = {} try: output['html_output'], output['html_table'] =\ get_test_output_from_logs(log_dr) except Exception as e: save_error('Problem getting output', output_loc, e) output['status'] = 1 # Save results save_results(output_loc, output)
def main(user_dr, job_name): temp_dr = os.path.join(user_dr, 'temp') params_loc = os.path.join(temp_dr, 'ML_Params_' + str(job_name) + '.json') output_loc = os.path.join(temp_dr, 'ML_Output_' + str(job_name) + '.json') with open(params_loc, 'r') as f: params = json.load(f)['params'] project_dr = os.path.join(user_dr, 'Jobs', params['project_id']) job_dr = os.path.join(project_dr, job_name) preds_loc = os.path.join(job_dr, 'raw_preds.pkl') try: with open(preds_loc, 'rb') as f: preds = pkl.load(f) except Exception as e: preds = None save_error('Error loading raw preds', output_loc, e) try: raw_preds, pred_rows = get_raw_preds_table_html(preds) except Exception as e: raw_preds, pred_rows = None, None save_error('Error generating raw preds table', output_loc, e) output = {'raw_preds': raw_preds, 'pred_rows': pred_rows} save_results(output_loc, output)
def main(user_dr, job_name): # Init setup params, job_dr, error_output_loc = run_setup(user_dr, job_name) # Get base job submission / overlap with test model_pipeline, problem_spec, ML =\ base_run(params, job_dr, error_output_loc, job_name) # Run Evaluate try: results = ML.Test(model_pipeline=model_pipeline, problem_spec=problem_spec, train_subjects='train', test_subjects='test', return_raw_preds=True) results['scorer_strs'] = ML.evaluator.scorer_strs results['n_repeats'], results['n_splits'] = 1, 1 except Exception as e: results = None save_error('Error starting Test', error_output_loc, e) # Save raw_preds seperate raw_preds = results.pop('raw_preds') # Save results results_loc = os.path.join(job_dr, 'results.pkl') with open(results_loc, 'wb') as f: pkl.dump(results, f) # Save raw preds preds_loc = os.path.join(job_dr, 'raw_preds.pkl') with open(preds_loc, 'wb') as f: pkl.dump(raw_preds, f)
def main(user_dr, job_name): # Init setup params, job_dr, error_output_loc = run_setup(user_dr, job_name) # Get base job submission / overlap with test model_pipeline, problem_spec, ML =\ base_run(params, job_dr, error_output_loc, job_name) # Get the Evaluate CV + splits info try: splits, n_repeats, CV =\ get_splits_CV(params['eval_params'], error_output_loc, ML.strat_u_name) except Exception as e: splits, n_repeats, CV = None, None, None save_error('Error parsing evaluate params', error_output_loc, e) # Run Evaluate try: results = ML.Evaluate(model_pipeline, problem_spec, splits=splits, n_repeats=n_repeats, CV=CV, train_subjects='train', return_raw_preds=True) results['scorer_strs'] = ML.evaluator.scorer_strs results['n_repeats'] = n_repeats results['n_splits'] = ML.evaluator.n_splits_ except Exception as e: results = None save_error('Error starting Evaluate', error_output_loc, e) # Save raw_preds seperate raw_preds = results.pop('raw_preds') # Save results results_loc = os.path.join(job_dr, 'results.pkl') with open(results_loc, 'wb') as f: pkl.dump(results, f) # Save raw preds preds_loc = os.path.join(job_dr, 'raw_preds.pkl') with open(preds_loc, 'wb') as f: pkl.dump(raw_preds, f)
def main(user_dr, n): temp_dr = os.path.join(user_dr, 'temp') output_loc = os.path.join(temp_dr, 'ML_Output_' + str(n) + '.json') # If existing output, remove if os.path.exists(output_loc): os.remove(output_loc) # Load in params params = load_params(user_dr, output_loc, n) # Load all data first loading_params = params['loading_params'] try: ML = load_all_data(loading_params, output_loc, user_dr, n) except Exception as e: ML = None save_error('Error loading data', output_loc, e) log_dr = os.path.join(temp_dr, 'ML_Logs_' + str(n)) # Get the CV param object try: cv_params = get_CV_from_params(params['val_params'], output_loc, ML.strat_u_name) except Exception as e: cv_params = None save_error('Error creating CV params', output_loc, e) # Obtain the CV splits from the BPt object try: cv, df = ML._get_CV(cv_params, show=True, show_original=True, return_df=True) except Exception as e: save_error('Error generating CV splits info', output_loc, e) # Create output results try: output = {} output['html_output'], output['html_table'] =\ get_val_output_from_logs(log_dr, df) output['status'] = 1 except Exception as e: output = None save_error('Error extracting CV info table', output_loc, e) # Save results save_results(output_loc, output)
def base_test_load(params, user_dr, output_loc, n): # Load all data first loading_params = params['loading_params'] try: ML = load_all_data(loading_params, output_loc, user_dr, n) except Exception as e: save_error('Error loading data', output_loc, e) ML._print('Loaded All Data.') # Apply the test split try: apply_test_split(params['test_params'], ML, output_loc) except Exception as e: save_error('Error applying test split', output_loc, e) ML._print('Performed test split.') return ML
def wrapper(*args, **kwargs) -> Any: sleep_time = get_global('SLEEP_TIME') * sleep_multiplier message: List[Union[str, Tuple]] = [] for n in range(max_tries): try: return func(*args, **kwargs) except (MyProxyError, ProxyError, ConnectionError, OSError, Timeout) as e: # message = [f'Exception in {func.__name__}:', # str(e), exc_info(), f'{n+1} try of {max_tries}'] message = [str(e)] if not silent: if sleep_time: message += [f'next try in {sleep_time} seconds'] log(*message, sep='\n') if sleep_time: sleep(sleep_time) if save and message: save_error(*message, sep='\n') if quit: exit(1)
def main(user_dr, job_name): temp_dr = os.path.join(user_dr, 'temp') params_loc = os.path.join(temp_dr, 'ML_Params_' + str(job_name) + '.json') output_loc = os.path.join(temp_dr, 'ML_Output_' + str(job_name) + '.json') with open(params_loc, 'r') as f: params = json.load(f)['params'] project_dr = os.path.join(user_dr, 'Jobs', params['project_id']) job_dr = os.path.join(project_dr, job_name) results_loc = os.path.join(job_dr, 'results.pkl') retry_cnt = 0 while retry_cnt < 10: try: with open(results_loc, 'rb') as f: results = pkl.load(f) retry_cnt = 20 except FileNotFoundError: time.sleep(1) retry_cnt += 1 if retry_cnt != 20: results = None save_error('Error reading saved results', output_loc) try: table_html = get_table_html(results) except Exception as e: save_error('Error generating summary table', output_loc, e) output = {'table_html': table_html} save_results(output_loc, output)
def get_show(params, ML, save_loc, output_loc): show_params = params['show_params'] if show_params['source'] in ['Data Variable', 'Set Variable']: try: display_df = ML.Show_Covars_Dist(covars=show_params['name'], subjects='both', show=False, cat_type='Frequency', return_display_dfs=True) except Exception as e: save_error('Error creating variable dist', output_loc, e) elif show_params['source'] == 'Target': try: display_df = ML.Show_Targets_Dist(targets=show_params['name'], subjects='both', show=False, cat_type='Frequency', return_display_dfs=True) except Exception as e: save_error('Error creating target dist', output_loc, e) elif show_params['source'] == 'Non-Input Variable': try: to_dist = show_params['name'].replace(ML.strat_u_name, '') display_df = ML.Show_Strat_Dist(strat=to_dist, subjects='both', show=False, cat_type='Frequency', return_display_dfs=True) except Exception as e: save_error('Error creating non-input dist', output_loc, e) elif show_params['source'] == 'Set': try: ML._print(show_params['name'], type(show_params['name'])) display_df = None ML.notebook = True anim = ML.Show_Data_Dist(data_subset=show_params['name'], subjects='both', return_anim=True) ML.notebook = False import matplotlib.animation as animation Writer = animation.writers['ffmpeg'] writer = Writer(fps=2) save_loc = save_loc.replace('.png', '.mp4') anim.save(save_loc.replace('.png', '.mp4'), dpi=ML.dpi, writer=writer) output = {} output['html_output'] = '' output['html_table'] = '' output['img_loc'] = save_loc output['status'] = 1 save_results(output_loc, output) return except Exception as e: save_error('Error creating distribution video', output_loc, e) plt.savefig(save_loc, dpi=ML.dpi, bbox_inches='tight') table = df_to_table(display_df) return save_loc, table