def download_all_logs(ip, port): # default log_location = h2o.download_all_logs() assert os.path.exists( log_location ), "Expected h2o logs to be saved in {0}, but they weren't".format( log_location) os.remove(log_location) # dirname and filename log_location = h2o.download_all_logs(".", "h2o_logs.txt") assert os.path.exists( log_location ), "Expected h2o logs to be saved in {0}, but they weren't".format( log_location) os.remove(log_location) # dirname log_location = h2o.download_all_logs(dirname=".") assert os.path.exists( log_location ), "Expected h2o logs to be saved in {0}, but they weren't".format( log_location) os.remove(log_location) # filename log_location = h2o.download_all_logs(filename="h2o_logs.txt") assert os.path.exists( log_location ), "Expected h2o logs to be saved in {0}, but they weren't".format( log_location) os.remove(log_location)
def download_logs(): results_dir = pyunit_utils.locate("results") logs_path = h2o.download_all_logs() assert os.path.exists(logs_path) logs_path_explicit = h2o.download_all_logs(dirname=results_dir, filename="logs.zip") assert logs_path_explicit == os.path.join(results_dir, "logs.zip") assert os.path.exists(logs_path_explicit)
def save_artifacts(automl, dataset, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: lb = automl.leaderboard.as_data_frame() log.debug("Leaderboard:\n%s", lb.to_string()) if 'leaderboard' in artifacts: models_dir = output_subdir("models", config) write_csv(lb, os.path.join(models_dir, "leaderboard.csv")) if 'models' in artifacts: models_dir = output_subdir("models", config) all_models_se = next( (mid for mid in lb['model_id'] if mid.startswith("StackedEnsemble_AllModels")), None) mformat = 'mojo' if 'mojos' in artifacts else 'json' if all_models_se and mformat == 'mojo': save_model(all_models_se, dest_dir=models_dir, mformat=mformat) else: for mid in lb['model_id']: save_model(mid, dest_dir=models_dir, mformat=mformat) models_archive = os.path.join(models_dir, "models.zip") zip_path(models_dir, models_archive) def delete(path, isdir): if path != models_archive and os.path.splitext( path)[1] in ['.json', '.zip']: os.remove(path) walk_apply(models_dir, delete, max_depth=0) if 'models_predictions' in artifacts: predictions_dir = output_subdir("predictions", config) test = h2o.get_frame(frame_name('test', config)) for mid in lb['model_id']: model = h2o.get_model(mid) save_predictions(model, test, dataset=dataset, config=config, predictions_file=os.path.join( predictions_dir, mid, 'predictions.csv'), preview=False) zip_path(predictions_dir, os.path.join(predictions_dir, "models_predictions.zip")) def delete(path, isdir): if isdir: shutil.rmtree(path, ignore_errors=True) walk_apply(predictions_dir, delete, max_depth=0) if 'logs' in artifacts: logs_dir = output_subdir("logs", config) h2o.download_all_logs(dirname=logs_dir) except Exception: log.debug("Error when saving artifacts.", exc_info=True)
def hadoop_download_logs(): # Check if we are running inside the H2O network by seeing if we can touch # the namenode. hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible() if hadoop_namenode_is_accessible: results_dir = pyunit_utils.locate("results") zip_file = h2o.download_all_logs() extracted_dir = os.path.abspath("extracted") print("Logs extracted into: " + extracted_dir) if os.path.isdir(extracted_dir): shutil.rmtree(extracted_dir) zip_ref = zipfile.ZipFile(zip_file, 'r') zip_ref.extractall(extracted_dir) zip_ref.close() nodes_log_dir = extracted_dir + "/" + os.listdir(extracted_dir)[0] nodes_log_file_names = os.listdir(nodes_log_dir) for f in expected_log_files(nodes_log_dir, nodes_log_file_names, "INFO"): print("Checking if file " + f + " exists") # check that all expected files exist assert os.path.isfile(f) == True else: raise EnvironmentError
def h2odownload_all_logs(): """ Python API test: h2o.download_all_logs(dirname=u'.', filename=None) """ training_data = h2o.import_file( pyunit_utils.locate("smalldata/logreg/benign.csv")) Y = 3 X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10] model = H2OGeneralizedLinearEstimator(family="binomial", alpha=0, Lambda=1e-5) model.train(x=X, y=Y, training_frame=training_data) try: results_dir = pyunit_utils.locate( "results") # find directory path to results folder filename = "logs.csv" dir_path = h2o.download_all_logs(results_dir, filename) # save logs csv full_path_filename = os.path.join(results_dir, filename) assert dir_path == full_path_filename, "h2o.download_all_logs() command is not working." assert os.path.isfile( full_path_filename ), "h2o.download_all_logs() command is not working." except Exception as e: if 'File not found' in e.args[0]: print( "Directory is not writable. h2o.download_all_logs() command is not tested." ) else: assert False, "h2o.download_all_logs() command is not working."
def get_all_variables_csv(i): ivd={} try: iv = pd.read_csv(i,header=None) except: logging.critical('read csv error') h2o.download_all_logs(dirname=logs_path, filename=logfile) h2o.cluster().shutdown() sys.exit(10) col=iv.values.tolist()[0] dt=iv.values.tolist()[1] i=0 for c in col: ivd[c.strip()]=dt[i].strip() i+=1 return ivd
def hadoop_download_logs(): # Check if we are running inside the H2O network by seeing if we can touch # the namenode. hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible( ) if hadoop_namenode_is_accessible: results_dir = pyunit_utils.locate("results") zip_file = h2o.download_all_logs() extracted_dir = os.path.abspath("extracted") print("Logs extracted into: " + extracted_dir) if os.path.isdir(extracted_dir): shutil.rmtree(extracted_dir) zip_ref = zipfile.ZipFile(zip_file, 'r') zip_ref.extractall(extracted_dir) zip_ref.close() nodes_log_dir = extracted_dir + "/" + os.listdir(extracted_dir)[0] nodes_log_file_names = os.listdir(nodes_log_dir) for f in expected_log_files(nodes_log_dir, nodes_log_file_names, "INFO"): print("Checking if file " + f + " exists") # check that all expected files exist assert os.path.isfile(f) == True else: raise EnvironmentError
def test_download_txt_logs(): marker = "txt-logs-marker" results_dir = pyunit_utils.locate( "results") # find directory path to results folder # log something unique so that we can try to search for it in the downloaded logs h2o.log_and_echo(marker) log_path = h2o.download_all_logs(results_dir, filename="plain_text_logs.log_ignore", container="LOG") assert not zipfile.is_zipfile(log_path) # logs are trimmed (we can only check smaller files) assert find_marker(log_path, marker) or os.path.getsize(log_path) > 10 * 1024 * 1042 # Now make sure we get a zip file if we don't specify the container format zip_path = h2o.download_all_logs(results_dir, filename="zip_logs.zip") assert zipfile.is_zipfile(zip_path)
def download_all_logs(ip,port): # Connect to h2o h2o.init(ip,port) # default log_location = h2o.download_all_logs() assert os.path.exists(log_location), "Expected h2o logs to be saved in {0}, but they weren't".format(log_location) os.remove(log_location) # dirname and filename log_location = h2o.download_all_logs(".","h2o_logs.txt") assert os.path.exists(log_location), "Expected h2o logs to be saved in {0}, but they weren't".format(log_location) os.remove(log_location) # dirname log_location = h2o.download_all_logs(dirname=".") assert os.path.exists(log_location), "Expected h2o logs to be saved in {0}, but they weren't".format(log_location) os.remove(log_location) # filename log_location = h2o.download_all_logs(filename="h2o_logs.txt") assert os.path.exists(log_location), "Expected h2o logs to be saved in {0}, but they weren't".format(log_location) os.remove(log_location)
def save_artifacts(automl, dataset, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: lb = automl.leaderboard.as_data_frame() log.debug("Leaderboard:\n%s", lb.to_string()) if 'leaderboard' in artifacts: models_dir = make_subdir("models", config) write_csv(lb, os.path.join(models_dir, "leaderboard.csv")) if 'models' in artifacts: models_dir = make_subdir("models", config) all_models_se = next( (mid for mid in lb['model_id'] if mid.startswith("StackedEnsemble_AllModels")), None) mformat = 'mojo' if 'mojos' in artifacts else 'json' if all_models_se: save_model(all_models_se, dest_dir=models_dir, mformat=mformat) else: for mid in lb['model_id']: save_model(mid, dest_dir=models_dir, mformat=mformat) if 'models_predictions' in artifacts: predictions_dir = make_subdir("predictions", config) test = h2o.get_frame(frame_name('test', config)) for mid in lb['model_id']: model = h2o.get_model(mid) save_predictions(model, test, dataset=dataset, config=config, predictions_file=os.path.join( predictions_dir, mid, 'predictions.csv')) if 'logs' in artifacts: logs_dir = make_subdir("logs", config) h2o.download_all_logs(dirname=logs_dir) except: log.debug("Error when saving artifacts.", exc_info=True)
def hadoop_download_logs(): zip_file = h2o.download_all_logs() extracted_dir = os.path.abspath("extracted") print("Logs extracted into: " + extracted_dir) if os.path.isdir(extracted_dir): shutil.rmtree(extracted_dir) zip_ref = zipfile.ZipFile(zip_file, 'r') zip_ref.extractall(extracted_dir) zip_ref.close() nodes_log_dir = extracted_dir + "/" + os.listdir(extracted_dir)[0] nodes_log_file_names = os.listdir(nodes_log_dir) for f in expected_log_files(nodes_log_dir, nodes_log_file_names, "INFO"): print("Checking if file " + f + " exists") # check that all expected files exist assert os.path.isfile(f)
def h2odownload_all_logs(): """ Python API test: h2o.download_all_logs(dirname=u'.', filename=None) """ training_data = h2o.import_file(pyunit_utils.locate("smalldata/logreg/benign.csv")) Y = 3 X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10] model = H2OGeneralizedLinearEstimator(family="binomial", alpha=0, Lambda=1e-5) model.train(x=X, y=Y, training_frame=training_data) try: results_dir = pyunit_utils.locate("results") # find directory path to results folder filename = "logs.csv" dir_path = h2o.download_all_logs(results_dir, filename) # save logs csv full_path_filename = os.path.join(results_dir, filename) assert dir_path == full_path_filename, "h2o.download_all_logs() command is not working." assert os.path.isfile(full_path_filename), "h2o.download_all_logs() command is not working." except Exception as e: if 'File not found' in e.args[0]: print("Directory is not writable. h2o.download_all_logs() command is not tested.") else: assert False, "h2o.download_all_logs() command is not working."
log_file=run_id+'.log' log_file = os.path.join(run_dir,log_file) logging.basicConfig(filename=log_file,level=logging.INFO,format="%(asctime)s:%(levelname)s:%(message)s") logging.info(start) # 65535 Highest port no port_no=random.randint(5555,55555) # h2o.init(strict_version_check=False,min_mem_size_GB=min_mem_size,port=port_no) # start h2o try: h2o.init(strict_version_check=False,min_mem_size_GB=min_mem_size,max_mem_size_GB=max_mem_size,port=port_no) # start h2o except: logging.critical('h2o.init') h2o.download_all_logs(dirname=logs_path, filename=logfile) h2o.cluster().shutdown() sys.exit(2) # In[108]: # meta data meta_data = set_meta_data(no_rows,analysis,run_id,server_path,data_path,test_path,model_path,target,run_time,classification,scale,max_models,balance_y,balance_threshold,name,run_dir,nthreads,min_mem_size,orig_path) # In[109]: # predictions only if model_path is not None: