def test_json_all(arg): log_info_repo(arg) # log("os.getcwd", os.getcwd()) root = os_package_root_path() root = root.replace("\\", "//") log(root) path = str( os.path.join(root, "dataset/json/") ) log(path) log("############ List of files ################################") #model_list = get_recursive_files2(root, r'/*/*.ipynb') model_list = get_recursive_files2(path, r'/*/.json') model_list2 = get_recursive_files2(path, r'/*/*.json') model_list = model_list + model_list2 print("List of JSON Files", model_list) for js_file in model_list: log("\n\n\n", "************", "JSON File", js_file) cfg = json.load(open(js_file, mode='r')) for kmode, ddict in cfg.items(): cmd = f"ml_models --do fit --config_file {js_file} --config_mode {kmode} " log_separator() log( cmd) os.system(cmd)
def test_import(arg=None): #import tensorflow as tf #import torch #log(np, np.__version__) #log(tf, tf.__version__) #### Import internally Create Issues #log(torch, torch.__version__) #log(dsa2) from importlib import import_module log_info_repo(arg) block_list = ["raw"] log_separator() log("test_import") file_list = os_get_file(folder=None, block_list=[], pattern=r"/*.py") print(file_list) for f in file_list: try: f = "dsa2." + f.replace("\\", ".").replace(".py", "").replace("/", ".") import_module(f) print(f) except Exception as e: log("Error", f, e)
def register(run_name, params, metrics, signature, model_class, tracking_uri="sqlite:///local.db"): """ :run_name: Name of model :log_params: dict with model params :metrics: dict with model evaluation metrics :signature: Its a signature that describes model input and output Schema :model_class: Type of class model :return: """ mlflow.set_tracking_uri(tracking_uri) with mlflow.start_run(run_name=run_name) as run: run_id = run.info.run_uuid experiment_id = run.info.experiment_id sk_model = load(params['path_train_model'] + "/model.pkl") mlflow.log_params(params) metrics.apply(lambda x: mlflow.log_metric(x.metric_name, x.metric_val), axis=1) mlflow.sklearn.log_model(sk_model, run_name, signature=signature, registered_model_name="sklearn_" + run_name + "_" + model_class) log("MLFLOW identifiers", run_id, experiment_id) mlflow.end_run()
def log_remote_start(arg=None): ## Download remote log on disk s = """ cd /home/runner/work/dsa2/ && git clone [email protected]:arita37/logs.git && ls && pwd """ cmd = " ; ".join(s.split("\n")) log(cmd) os.system(cmd)
def test_list(mlist): #log("os.getcwd", os.getcwd()) path = dsa2.__path__[0] # mlist = str_list.split(",") test_list = [f"python {path}/{model}" for model in mlist] for cmd in test_list: log_separator() log( cmd) os.system(cmd)
def main(): arg = cli_load_arguments() log(arg.do, arg.config_file, arg) #### Input is String list of model name if ".py" in arg.do: s = arg.do test_list(s.split(",")) else: log("ml_test --do " + arg.do) globals()[arg.do](arg)
def objective_fun(mdict): if debug : log(mdict)# ddict = run_train(config_name="", config_path="", n_sample= n_sample, mode="run_preprocess", model_dict=mdict, return_mode='dict') # print(ddict['stats']['metrics_test'].to_dict('records')[0]) ddict['stats'][metric_name] = ddict['stats']['metrics_test'].to_dict('records')[0]['metric_val'] if debug : print(ddict) res = ddict['stats'][metric_name] return res
def objective_fun(mdict): if debug : log(mdict)# ddict = run_train(config_name="", config_path="", n_sample= n_sample, mode="run_preprocess", model_dict=mdict, return_mode='dict') # print(ddict['stats']['metrics_test'].to_dict('records')[0]) #res = ddict['stats']['metrics_test'].to_dict('records')[0]['metric_val'] df = ddict['stats']['metrics_test'] #### Beware of the sign res = -np.mean(df[ df['metric_name'] == metric_name ]['metric_val'].values) return res
def get_global_pars(config_uri=""): log("#### Model Params Dynamic loading ##########################################") from source.util_feature import load_function_uri print("config_uri",config_uri) model_dict_fun = load_function_uri(uri_name=config_uri ) #### Get dict + Update Global variables try : model_dict = model_dict_fun() ### params except : model_dict = model_dict_fun return model_dict
def train(config='', nsample=None): config_uri, config_name = get_config_path(config) mdict = get_global_pars(config_uri) m = mdict['global_pars'] log(mdict) from source import run_train run_train.run_train( config_name=config_name, config_path=m['config_path'], n_sample=nsample if nsample is not None else m['n_sample'], )
def test_all(arg=None): log_info_repo(arg) from time import sleep # log("os.getcwd", os.getcwd()) path = dsa2.__path__[0] log("############Check model ################################") model_list = model_get_list(folder=None, block_list=[]) log(model_list) ## Block list # root = os_package_root_path() cfg = json.load(open( path_norm(arg.config_file), mode='r'))['test_all'] block_list = cfg['model_blocked'] model_list = [t for t in model_list if t not in block_list] log("Used", model_list) path = path.replace("\\", "//") test_list = [f"python {path}/" + t.replace(".", "//") + ".py" for t in model_list] for cmd in test_list: log_separator() log( cmd) os.system(cmd) log_remote_push() sleep(5)
def data_profile2(config=''): """ :param config: :return: """ config_uri, config_name = get_config_path(config) from source.run_feature_profile import run_profile mdict = get_global_pars( config_uri) m = mdict['global_pars'] log(mdict) run_profile(path_data = m['path_data_train'], path_output = m['path_model'] + "/profile/", n_sample = 5000, )
def train_sampler(config='', nsample=None): """ train a model with confi_name and nsample :param config: :param nsample: :return: """ config_uri, config_name = get_config_path(config) mdict = get_global_pars( config_uri) m = mdict['global_pars'] log(mdict) from source import run_sampler run_sampler.run_train(config_name = config_name, config_path = m['config_path'], n_sample = nsample if nsample is not None else m['n_sample'], # use_mlmflow = False )
def log_remote_push(arg=None): ### Pushing to dsa2_store with --force # tag ="ml_store" & arg.name tag = "m_" + str(arg.name) s = f""" cd /home/runner/work/dsa2/dsa2_store/ pip3 freeze > deps.txt ls git config --local user.email "*****@*****.**" && git config --local user.name "arita37" git add --all && git commit -m "{tag}" git pull --all git push --all -f cd /home/runner/work/dsa2/dsa2/ """ cmd = " ; ".join(s.split("\n")) log(cmd) os.system(cmd)
def preprocess(config='', nsample=None): """ """ config_uri, config_name = get_config_path(config) mdict = get_global_pars(config_uri) m = mdict['global_pars'] log(mdict) from source import run_preprocess run_preprocess.run_preprocess( config_name=config_name, config_path=m['config_path'], n_sample=nsample if nsample is not None else m['n_sample'], ### Optonal mode='run_preprocess')
def predict(config='', nsample=None): config_uri, config_name = get_config_path(config) mdict = get_global_pars(config_uri) m = mdict['global_pars'] log(mdict) from source import run_inference run_inference.run_predict( config_name=config_name, config_path=m['config_path'], n_sample=nsample if nsample is not None else m['n_sample'], #### Optional path_data=m['path_pred_data'], path_output=m['path_pred_output'], model_dict=None)
def test_dataloader(arg=None): log_info_repo(arg) # log("os.getcwd", os.getcwd()) path = dsa2.__path__[0] cfg = json_load(path_norm(arg.config_file)) log("############Check model ################################") path = path.replace("\\", "//") test_list = [ f"python {path}/dataloader.py --do test " , f"python {path}/preprocess/generic.py --do test " , ] for cmd in test_list: log_separator() log( cmd) os.system(cmd)
def test_benchmark(arg=None): log_info_repo(arg) # log("os.getcwd", os.getcwd()) path = dsa2.__path__[0] log("############Check model ################################") path = path.replace("\\", "//") test_list = [ f"python {path}/benchmark.py --do timeseries " , f"python {path}/benchmark.py --do vision_mnist " , f"python {path}/benchmark.py --do fashion_vision_mnist " , f"python {path}/benchmark.py --do text_classification " , f"python {path}/benchmark.py --do nlp_reuters " , ] for cmd in test_list: log_separator() print( cmd) os.system(cmd)
def transform(config='', nsample=None): """ :param config: :param nsample: :return: """ config_uri, config_name = get_config_path(config) mdict = get_global_pars( config_uri) m = mdict['global_pars'] log(mdict) from source import run_sampler run_sampler.run_transform(config_name = config_name, config_path = m['config_path'], n_sample = nsample if nsample is not None else m['n_sample'], #### Optional path_data = m['path_pred_data'], path_output = m['path_pred_output'], model_dict = None )
def test_model_structure(): log("os.getcwd", os.getcwd()) log(dsa2) path = dsa2.__path__[0] log("############Check structure ############################") cmd = f"ztest_structure.py" os.system(cmd)
def test_cli(arg=None): log("# Testing Command Line System ") log_info_repo(arg) import dsa2, os path = dsa2.__path__[0] ### Root Path # if arg is None : # fileconfig = path_norm( f"{path}/config/cli_test_list.md" ) # else : # fileconfig = path_norm( arg.config_file ) # fileconfig = path_norm( f"{path}/../config/cli_test_list.md" ) fileconfig = path_norm( f"{path}/../README_usage_CLI.md" ) print("Using :", fileconfig) def is_valid_cmd(cmd) : cmd = cmd.strip() if len(cmd) > 15 : if cmd.startswith("ml_models ") or cmd.startswith("ml_benchmark ") or cmd.startswith("ml_optim ") : return True return False with open( fileconfig, mode="r" ) as f: cmd_list = f.readlines() print(cmd_list[:3]) #### Parse the CMD from the file .md and Execute for ss in cmd_list: cmd = ss.strip() if is_valid_cmd(cmd): cmd = cmd + to_logfile("cli", '+%Y-%m-%d_%H') log_separator() print( cmd, flush=True) os.system(cmd)
def hyperparam_wrapper(config_full="", ntrials=2, n_sample=5000, debug=1, path_output = "data/output/titanic1/", path_optuna_storage = 'data/output/optuna_hyper/optunadb.db', metric_name='accuracy_score', mdict_range=None): from source.util_feature import load_function_uri from source.run_train import run_train from source.run_hyperopt import run_hyper_optuna import json ############################################################################## ####### model_dict initial dict of params ################################### config_name = config_full.split("::")[-1] mdict = load_function_uri(config_full) #titanic1() mdict = mdict() ####### Objective ########################################################## def objective_fun(mdict): if debug : log(mdict)# ddict = run_train(config_name="", config_path="", n_sample= n_sample, mode="run_preprocess", model_dict=mdict, return_mode='dict') # print(ddict['stats']['metrics_test'].to_dict('records')[0]) #res = ddict['stats']['metrics_test'].to_dict('records')[0]['metric_val'] df = ddict['stats']['metrics_test'] #### Beware of the sign res = -np.mean(df[ df['metric_name'] == metric_name ]['metric_val'].values) return res ##### Optuna Params #################################################### engine_pars = {'metric_target' :'loss', 'study_name' : config_name , 'storage' : "sqlite:///:memory:" } # f"sqlite:///" + os.path.abspath(path_optuna_storage).replace("\\", "/") } ##### Running the optim best_dict = run_hyper_optuna(objective_fun, mdict, mdict_range, engine_pars, ntrials= ntrials) ##### Export os.makedirs(path_output, exist_ok=True) json.dump(best_dict, open(path_output + "/hyper_params_best.json", mode='a')) log(engine_pars['storage']) log(best_dict) log(path_output)
def test_json(arg): log_info_repo(arg) log("os.getcwd", os.getcwd()) path = dsa2.__path__[0] cfg = json.load(open(arg.config_file, mode='r')) mlist = cfg['model_list'] log(mlist) test_list = [f"python {path}/{model}" for model in mlist] for cmd in test_list: log_separator() log( cmd) os.system(cmd)
def check(config='titanic_classifier.py::titanic_lightgbm'): mdict = get_global_pars(config) m = mdict['global_pars'] log(mdict) pass
def test_functions(arg=None): from dsa2.util import load_function_uri path = path_norm("dataset/test_json/test_functions.json") dd = json.load(open( path ))['test'] for p in dd : try : log("\n\n","#"*20, p) myfun = load_function_uri( p['uri']) log(myfun) w = p.get('args', []) kw = p.get('kw_args', {} ) if len(kw) == 0 and len(w) == 0 : log( myfun()) elif len(kw) > 0 and len(w) > 0 : log( myfun( *w, ** kw )) elif len(kw) > 0 and len(w) == 0 : log( myfun( ** kw )) elif len(kw) == 0 and len(w) > 0 : log( myfun( *w )) except Exception as e: log(e, p )
def check(config='outlier_predict.py::titanic_lightgbm'): mdict = get_global_pars(config) m = mdict['global_pars'] log(mdict) pass
def test_pullrequest(arg=None): """ Scan files in /pullrequest/ and run test on it. """ log_info_repo(arg) from pathlib import Path # log("os.getcwd", os.getcwd()) path = str( os.path.join(Path(dsa2.__path__[0] ).parent , "pullrequest/") ) log(path) log("############Check model ################################") file_list = get_recursive_files(path , r"*.py" ) log(file_list) ## Block list block_list = [] test_list = [t for t in file_list if t not in block_list] log("Used", test_list) log("########### Run Check ##############################") test_import(arg=None) sleep(20) os.system("ml_optim") os.system("ml_dsa2") for file in test_list: file = file + to_logfile(prefix="", dateformat='' ) cmd = f"python {file}" log_separator() log( cmd) os.system(cmd) sleep(5) #### Check the logs ################################### with open("log_.txt", mode="r") as f : lines = f.readlines() for x in lines : if "Error" in x : raise Exception(f"Unknown dataset type", x)