Esempio n. 1
0
def check_loading_cache(params, output_loc, v_type):

    # Compute hash of the passed params
    try:
        param_hash = get_param_hash(params)
    except Exception as e:
        from utils import save_error
        save_error('Error caching passed params', output_loc, e)

    # Check if params are cached
    if os.path.exists(os.path.join(LOADING_CACHE_DR, param_hash)):

        # Copy saved results / output to the output_loc
        saved_loc = os.path.join(LOADING_CACHE_DR, param_hash)
        copyfile(saved_loc, output_loc)

        # Increment the input cache
        incr_input_cache(v_type, params)

        # Exit the script, as everything is done
        sys.exit()

    # Otherwise, just return the param hash
    # to be used to eventually save the output
    return param_hash
Esempio n. 2
0
def check_loaded_cache(params, output_loc, user_dr, n):

    # Compute hash of the passed params
    try:
        param_hash = get_param_hash(params)
    except Exception as e:
        from utils import save_error
        save_error('Error caching passed params', output_loc, e)

    # Check if params are already cached
    saved_loc = os.path.join(LOADED_CACHE_DR, param_hash)
    if os.path.exists(saved_loc):

        # If exists load saved ML obj, overriding old cache info
        ML = Load(saved_loc,
                  exp_name='ML_Logs_' + str(n),
                  log_dr=os.path.join(user_dr, 'temp'),
                  existing_log='overwrite')

        # Update timestamp
        Path(saved_loc).touch()

        return ML

    # If not caches return the param_hash
    return param_hash
Esempio n. 3
0
def main(user_dr, n):

    temp_dr = os.path.join(user_dr, 'temp')
    output_loc = os.path.join(temp_dr, 'ML_Output_' + str(n) + '.json')

    # Load in params
    params = load_params(user_dr, output_loc, n)

    # Base apply test
    ML = base_test_load(params, user_dr, output_loc, n)
    ML._print('loaded')

    log_dr = os.path.join(temp_dr, 'ML_Logs_' + str(n))

    # Create output results
    output = {}

    try:
        output['html_output'], output['html_table'] =\
            get_test_output_from_logs(log_dr)
    except Exception as e:
        save_error('Problem getting output', output_loc, e)
    output['status'] = 1

    # Save results
    save_results(output_loc, output)
Esempio n. 4
0
def main(user_dr, job_name):

    temp_dr = os.path.join(user_dr, 'temp')
    params_loc = os.path.join(temp_dr, 'ML_Params_' + str(job_name) + '.json')
    output_loc = os.path.join(temp_dr, 'ML_Output_' + str(job_name) + '.json')

    with open(params_loc, 'r') as f:
        params = json.load(f)['params']

    project_dr = os.path.join(user_dr, 'Jobs', params['project_id'])
    job_dr = os.path.join(project_dr, job_name)
    preds_loc = os.path.join(job_dr, 'raw_preds.pkl')

    try:
        with open(preds_loc, 'rb') as f:
            preds = pkl.load(f)
    except Exception as e:
        preds = None
        save_error('Error loading raw preds', output_loc, e)

    try:
        raw_preds, pred_rows = get_raw_preds_table_html(preds)
    except Exception as e:
        raw_preds, pred_rows = None, None
        save_error('Error generating raw preds table', output_loc, e)

    output = {'raw_preds': raw_preds, 'pred_rows': pred_rows}

    save_results(output_loc, output)
Esempio n. 5
0
def main(user_dr, job_name):

    # Init setup
    params, job_dr, error_output_loc = run_setup(user_dr, job_name)

    # Get base job submission / overlap with test
    model_pipeline, problem_spec, ML =\
        base_run(params, job_dr, error_output_loc, job_name)

    # Run Evaluate
    try:
        results = ML.Test(model_pipeline=model_pipeline,
                          problem_spec=problem_spec,
                          train_subjects='train',
                          test_subjects='test',
                          return_raw_preds=True)
        results['scorer_strs'] = ML.evaluator.scorer_strs
        results['n_repeats'], results['n_splits'] = 1, 1

    except Exception as e:
        results = None
        save_error('Error starting Test', error_output_loc, e)

    # Save raw_preds seperate
    raw_preds = results.pop('raw_preds')

    # Save results
    results_loc = os.path.join(job_dr, 'results.pkl')
    with open(results_loc, 'wb') as f:
        pkl.dump(results, f)

    # Save raw preds
    preds_loc = os.path.join(job_dr, 'raw_preds.pkl')
    with open(preds_loc, 'wb') as f:
        pkl.dump(raw_preds, f)
Esempio n. 6
0
def main(user_dr, job_name):

    # Init setup
    params, job_dr, error_output_loc = run_setup(user_dr, job_name)

    # Get base job submission / overlap with test
    model_pipeline, problem_spec, ML =\
        base_run(params, job_dr, error_output_loc, job_name)

    # Get the Evaluate CV + splits info
    try:
        splits, n_repeats, CV =\
            get_splits_CV(params['eval_params'], error_output_loc,
                          ML.strat_u_name)
    except Exception as e:
        splits, n_repeats, CV = None, None, None
        save_error('Error parsing evaluate params', error_output_loc, e)

    # Run Evaluate
    try:
        results = ML.Evaluate(model_pipeline,
                              problem_spec,
                              splits=splits,
                              n_repeats=n_repeats,
                              CV=CV,
                              train_subjects='train',
                              return_raw_preds=True)
        results['scorer_strs'] = ML.evaluator.scorer_strs
        results['n_repeats'] = n_repeats
        results['n_splits'] = ML.evaluator.n_splits_
    except Exception as e:
        results = None
        save_error('Error starting Evaluate', error_output_loc, e)

    # Save raw_preds seperate
    raw_preds = results.pop('raw_preds')

    # Save results
    results_loc = os.path.join(job_dr, 'results.pkl')
    with open(results_loc, 'wb') as f:
        pkl.dump(results, f)

    # Save raw preds
    preds_loc = os.path.join(job_dr, 'raw_preds.pkl')
    with open(preds_loc, 'wb') as f:
        pkl.dump(raw_preds, f)
Esempio n. 7
0
def main(user_dr, n):

    temp_dr = os.path.join(user_dr, 'temp')
    output_loc = os.path.join(temp_dr, 'ML_Output_' + str(n) + '.json')

    # If existing output, remove
    if os.path.exists(output_loc):
        os.remove(output_loc)

    # Load in params
    params = load_params(user_dr, output_loc, n)

    # Load all data first
    loading_params = params['loading_params']

    try:
        ML = load_all_data(loading_params, output_loc, user_dr, n)
    except Exception as e:
        ML = None
        save_error('Error loading data', output_loc, e)

    log_dr = os.path.join(temp_dr, 'ML_Logs_' + str(n))

    # Get the CV param object
    try:
        cv_params = get_CV_from_params(params['val_params'], output_loc,
                                       ML.strat_u_name)
    except Exception as e:
        cv_params = None
        save_error('Error creating CV params', output_loc, e)

    # Obtain the CV splits from the BPt object
    try:
        cv, df = ML._get_CV(cv_params,
                            show=True,
                            show_original=True,
                            return_df=True)
    except Exception as e:
        save_error('Error generating CV splits info', output_loc, e)

    # Create output results
    try:
        output = {}
        output['html_output'], output['html_table'] =\
            get_val_output_from_logs(log_dr, df)
        output['status'] = 1
    except Exception as e:
        output = None
        save_error('Error extracting CV info table', output_loc, e)

    # Save results
    save_results(output_loc, output)
Esempio n. 8
0
def base_test_load(params, user_dr, output_loc, n):

    # Load all data first
    loading_params = params['loading_params']
    try:
        ML = load_all_data(loading_params, output_loc, user_dr, n)
    except Exception as e:
        save_error('Error loading data', output_loc, e)

    ML._print('Loaded All Data.')

    # Apply the test split
    try:
        apply_test_split(params['test_params'], ML, output_loc)
    except Exception as e:
        save_error('Error applying test split', output_loc, e)

    ML._print('Performed test split.')

    return ML
Esempio n. 9
0
 def wrapper(*args, **kwargs) -> Any:
     sleep_time = get_global('SLEEP_TIME') * sleep_multiplier
     message: List[Union[str, Tuple]] = []
     for n in range(max_tries):
         try:
             return func(*args, **kwargs)
         except (MyProxyError, ProxyError, ConnectionError, OSError,
                 Timeout) as e:
             # message = [f'Exception in {func.__name__}:',
             #               str(e), exc_info(), f'{n+1} try of {max_tries}']
             message = [str(e)]
             if not silent:
                 if sleep_time:
                     message += [f'next try in {sleep_time} seconds']
                 log(*message, sep='\n')
             if sleep_time:
                 sleep(sleep_time)
     if save and message:
         save_error(*message, sep='\n')
     if quit:
         exit(1)
Esempio n. 10
0
def main(user_dr, job_name):

    temp_dr = os.path.join(user_dr, 'temp')
    params_loc = os.path.join(temp_dr, 'ML_Params_' + str(job_name) + '.json')
    output_loc = os.path.join(temp_dr, 'ML_Output_' + str(job_name) + '.json')

    with open(params_loc, 'r') as f:
        params = json.load(f)['params']

    project_dr = os.path.join(user_dr, 'Jobs', params['project_id'])
    job_dr = os.path.join(project_dr, job_name)
    results_loc = os.path.join(job_dr, 'results.pkl')

    retry_cnt = 0
    while retry_cnt < 10:

        try:
            with open(results_loc, 'rb') as f:
                results = pkl.load(f)
                retry_cnt = 20
        except FileNotFoundError:
            time.sleep(1)
            retry_cnt += 1

    if retry_cnt != 20:
        results = None
        save_error('Error reading saved results', output_loc)

    try:
        table_html = get_table_html(results)
    except Exception as e:
        save_error('Error generating summary table', output_loc, e)

    output = {'table_html': table_html}

    save_results(output_loc, output)
Esempio n. 11
0
def get_show(params, ML, save_loc, output_loc):

    show_params = params['show_params']

    if show_params['source'] in ['Data Variable', 'Set Variable']:

        try:

            display_df = ML.Show_Covars_Dist(covars=show_params['name'],
                                             subjects='both',
                                             show=False,
                                             cat_type='Frequency',
                                             return_display_dfs=True)

        except Exception as e:
            save_error('Error creating variable dist', output_loc, e)

    elif show_params['source'] == 'Target':

        try:

            display_df = ML.Show_Targets_Dist(targets=show_params['name'],
                                              subjects='both',
                                              show=False,
                                              cat_type='Frequency',
                                              return_display_dfs=True)

        except Exception as e:
            save_error('Error creating target dist', output_loc, e)

    elif show_params['source'] == 'Non-Input Variable':

        try:
            to_dist = show_params['name'].replace(ML.strat_u_name, '')
            display_df = ML.Show_Strat_Dist(strat=to_dist,
                                            subjects='both',
                                            show=False,
                                            cat_type='Frequency',
                                            return_display_dfs=True)

        except Exception as e:
            save_error('Error creating non-input dist', output_loc, e)

    elif show_params['source'] == 'Set':

        try:

            ML._print(show_params['name'], type(show_params['name']))

            display_df = None
            ML.notebook = True
            anim = ML.Show_Data_Dist(data_subset=show_params['name'],
                                     subjects='both',
                                     return_anim=True)
            ML.notebook = False

            import matplotlib.animation as animation
            Writer = animation.writers['ffmpeg']
            writer = Writer(fps=2)
            save_loc = save_loc.replace('.png', '.mp4')
            anim.save(save_loc.replace('.png', '.mp4'),
                      dpi=ML.dpi,
                      writer=writer)

            output = {}
            output['html_output'] = ''
            output['html_table'] = ''
            output['img_loc'] = save_loc
            output['status'] = 1
            save_results(output_loc, output)
            return

        except Exception as e:
            save_error('Error creating distribution video', output_loc, e)

    plt.savefig(save_loc, dpi=ML.dpi, bbox_inches='tight')
    table = df_to_table(display_df)

    return save_loc, table