def process_connection(self, conn): console.print("new connection established") data = conn.recv(16000) #console.print("data=", data) if data: # decode command text = data.decode() cd = json.loads(text) team_name = cd["team_name"] if "team_name" in cd else None first_key = next(iter(cd)) console.print("cmd: {}".format(first_key)) if "get_creds" in cd: response = self.creds[team_name] elif "set_creds" in cd: self.creds[team_name] = cd["set_creds"] response = "OK" elif "terminate" in cd: self.terminate = True response = "OK" else: error.internal_error( "unrecognized cmd received by xt_cache_server: {}".format( cd)) byte_buff = response.encode() conn.send(byte_buff)
def set_timer(timeout): console.print("set_timer called: timeout=", self.timeout) time.sleep(self.timeout) console.diag("timer triggered!") plt.close("all") print("closed all plots and the fig")
def listen_for_commands(self): # if not os.path.exists(FN_CERT): # errors.internal_error("cert file is missing: " + FN_CERT) # context = ssl.create_default_context(purpose=ssl.Purpose.CLIENT_AUTH, capath=FN_CERT) # context.set_ciphers('EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH') with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as normal_sock: if self.use_ssl: sock = context.wrap_socket(normal_sock, server_hostname=HOST, ca_certs="server.crt", cert_reqs=ssl.CERT_REQUIRED) else: sock = normal_sock # Free up the port for reuse if the process is killed sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind((HOST, CACHE_SERVER_PORT)) sock.listen() console.print("waiting for client input...") while not self.terminate: conn, addr = sock.accept() with conn: try: self.process_connection(conn) except BaseException as ex: logger.exception( "Error during communication in xt_server, ex={}". format(ex)) console.print("exception: " + str(ex))
def get_status(self, fn_entry): status = "completed" # unless below finds different fn_queue_entry = file_utils.path_join(self.psm_queue_path, fn_entry, for_windows=False) ssh_cmd = "ls -lt " + fn_queue_entry result = None #error_code, result = process_utils.sync_run_ssh(None, self.box_addr, ssh_cmd, report_error=False) result = self.run_cmd(ssh_cmd) if result and fn_entry in result: status = "queued" else: text = self.get_running_entry_name() if text == fn_entry: # entry might be running; is the runner script OR controller active? if self._get_runner_script_process_id(): status = "running" elif self._get_controller_process_id(): status = "running" else: console.print("--> runner script and controller processes not running") else: console.print("PSM current job:", text) return status
def dos2unix(self, name): with open(name, "rt") as infile: text = infile.read() text = text.replace("\r", "") # specify newline="" here to prevent open() from messing with our newlines with open(name, "wt", newline="") as outfile: outfile.write(text) console.print("CR characters removed: {}".format(name))
def run_scp_cmd(caller, scp_parts, report_error=True): # cmd = 'scp -i {} {}'.format(constants.LOCAL_KEYPAIR_PRIVATE, cmd) cmd_parts = ["scp", "-i", constants.LOCAL_KEYPAIR_PRIVATE] + scp_parts console.print(" running SCP cmd: {}".format(" ".join(cmd_parts))) exit_code, output = sync_run(cmd_parts) if report_error and exit_code: console.print(output) raise Exception("scp copy command failed") return exit_code, output
def _send_cmd_to_cache_server(self, cmd_dict, max_retries, can_start_server): # retry up to 5 secs (to handle case where XT cache server is being started) if True: # os.path.exists(FN_CERT): # context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, capath=FN_CERT) # context.set_ciphers('EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH') for i in range(max_retries): try: byte_buffer = json.dumps(cmd_dict).encode() with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as normal_sock: if self.use_ssl: sock = context.wrap_socket( normal_sock, server_hostname=HOST, ca_certs="server.crt", cert_reqs=ssl.CERT_REQUIRED) else: sock = normal_sock sock.connect((HOST, CACHE_SERVER_PORT)) # send cmd_dict as bytes sock.sendall(byte_buffer) # read response data = sock.recv(16000) response = data.decode() return response except BaseException as ex: if i == 0 and can_start_server: # first try failed; try starting the server self._start_xt_cache_server() if i > 0: # we are retrying some error after trying to start the server console.print(".", end="", flush=True) #console.print(ex) time.sleep(1) # don't log this since it shows up to user as a confusing message # if i == max_retries-1: # logger.exception("Error retry exceeded sending cmd to XT cache server. Last ex={}".format(ex)) return None
def sync_run(cmd_parts, capture_output=True, shell=False, report_error=False, env_vars=None, capture_as_bytes=False): ''' this does a synchronous run of the specified cmd/app and returns the app's exitcode. It runs in the current working directory, but target app MUST be a fully qualified path. ''' universal_newlines = False #cmd = " ".join(cmd_parts) if isinstance(cmd_parts, list) else cmd_parts console.diag("sync_run: {}".format(cmd_parts)) # linux won't accept a command, only cmd parts #assert isinstance(cmd_parts, (list, tuple)) if isinstance(cmd_parts, str): cmd_parts = cmd_parts.split(" ") if capture_output: process = subprocess.run(cmd_parts, cwd=".", env=env_vars, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=universal_newlines, shell=shell) output = process.stdout if not capture_as_bytes: if not universal_newlines: # since universal_newlines=False, we need to map bytes to str output = output.decode("utf-8", errors='backslashreplace').replace( '\r', '') output = filter_out_verbose_lines(output) else: process = subprocess.run(cmd_parts, cwd=".", env=env_vars, shell=shell) output = None exit_code = process.returncode if report_error and exit_code: console.print(output) raise Exception("sync run failed, exit code={}, error={}".format( exit_code, output)) return exit_code, output
def single_char_input(prompt=None, end="\n"): if prompt: console.print(prompt, end="", flush=True) try: with KeyPressChecker() as kpc: ch = kpc.getch_wait() except KeyboardInterrupt: ch = constants.CONTROL_C if end: console.print(end, end="") return ch
def scp_copy_file_to_box(caller, box_addr, fn_local, box_fn, report_error=True): #cmd = 'scp -i {} "{}" {}:{}'.format(constants.LOCAL_KEYPAIR_PRIVATE, fn_local, box_addr, box_fn) cmd_parts = [ "scp", "-i", os.path.expanduser(constants.LOCAL_KEYPAIR_PRIVATE), fn_local, "{}:{}".format(box_addr, box_fn) ] console.diag(" copying script to box; cmd={}".format(cmd_parts)) exit_code, output = sync_run(cmd_parts) if report_error and exit_code: console.print(output) raise Exception("scp copy command failed: {}".format(output)) return exit_code, output
def report(self): self.max_runs_per_runset = 0 n1 = 0 for runset in self.runsets: num_runs = len(runset.runs) if num_runs > self.max_runs_per_runset: self.max_runs_per_runset = num_runs if num_runs == 1: n1 += 1 console.print("{} runs".format(len(self.runs))) console.print("{} runsets".format(len(self.runsets))) console.print("{} have 1 run".format(n1)) console.print("{} max runs per runset".format( self.max_runs_per_runset))
def _load_node_creds(self): loaded = False sc = os.getenv("XT_STORE_CREDS") sc = utils.base64_to_text(sc) mc = os.getenv("XT_MONGO_CONN_STR") mc = utils.base64_to_text(mc) #print("init_cred: sc={}, mc={}".format(sc, mc)) if sc and mc: # XT client on compute node # cleanup (for testing from client) sc = sc.replace('\\"', '"') mc = mc.replace('\\"', '"') if mc.startswith('"'): mc = mc[1:-1] # print("sc=", sc) sc_data = json.loads(sc) store_name = sc_data["name"] store_key = sc_data["key"] # sample mc: mongodb://xt-sandbox-cosmos:kBOWLQrseZ prefix, mc_rest = mc.split("://", 1) mc_name, _ = mc_rest.split(":", 1) # creds are limited in this case to just Store access [storage + mongo] creds = json.dumps({store_name: store_key, mc_name: mc}) self.apply_creds(creds) console.print( "init_creds: using compute node ENV VAR settings for store={}, mongo={}" .format(store_name, mc_name)) loaded = True return loaded
def console_callback(run_name, msg): if msg.startswith(constants.APP_EXIT_MSG): #console.print(msg) status = msg.split(":")[1].strip() desc = self.status_to_desc(run_name, status) console.print(desc, flush=True) context.remote_app_is_running = False else: if show_run_name: console.print(run_name + ": " + msg, end="", flush=True) else: console.print(msg, end="", flush=True) sys.stdout.flush()
def fixup_jobs_if_needed(client): collection = client["__jobs__"] updated_count = 0 while True: # build next batch of original records where JOB_ID is defined but JOB_NUM is not cursor = collection.find( {"job_id": {"$exists": True}, "job_num": {"$exists": False}} , {"_iid": 1}).limit(batch_size) records = list(cursor) if len(records) == 0: break if updated_count == 0: console.print("found mongo-db JOB records written by older version of XT; upgrading them to new format...") process_job_batch(collection, records) updated_count += len(records) console.print("update progress=", updated_count) if updated_count: console.print("upgrade complete (updated {:,} records)".format(updated_count))
def fixup_runs_if_needed(client, workspace): collection = client[workspace] updated_count = 0 # count = collection.count() # console.print("collection count=", count) while True: # get next batch of original records #cursor = collection.find( {"run_name": {"$exists": True}, "run_num": {"$exists": False}} , {"_iid": 1}).limit(batch_size) cursor = collection.find( {"run_name": {"$exists": True}, "run_num": 0} , {"_iid": 1}).limit(batch_size) records = list(cursor) if len(records) == 0: break if updated_count == 0: console.print("found mongo-db RUN records written by older version of XT; upgrading them to new format...") process_run_batch(collection, records) updated_count += len(records) console.print("update progress=", updated_count) if updated_count: console.print("upgrade complete (updated {:,} records)".format(updated_count))
def attach_task_to_console(self, ws_name, run_name, show_waiting_msg=False, show_run_name=False, escape=0): full_run_name = ws_name + "/" + run_name # callback for each console msg from ATTACHED task def console_callback(run_name, msg): if msg.startswith(constants.APP_EXIT_MSG): #console.print(msg) status = msg.split(":")[1].strip() desc = self.status_to_desc(run_name, status) console.print(desc, flush=True) context.remote_app_is_running = False else: if show_run_name: console.print(run_name + ": " + msg, end="", flush=True) else: console.print(msg, end="", flush=True) sys.stdout.flush() # RPYC bug workaround - callback cannot write to variable in its context # but it CAN write to an object's attribute context = Bag() context.remote_app_is_running = True show_detach_msg = False detach_requested = False attached, status = self.xtc.attach(ws_name, run_name, console_callback) #console.print("attached=", attached, ", status=", status) if attached: #if show_waiting_msg: # console.print("\n<attached: {}>\n".format(full_run_name)) started = time.time() timeout = escape if timeout: timeout = float(timeout) try: with KeyPressChecker() as checker: # ATTACH LOOP #console.print("entering ATTACH WHILE LOOP...") while context.remote_app_is_running: #console.print(".", end="") #sys.stdout.flush() if checker.getch_nowait() == 27: detach_requested = True break time.sleep(.1) if timeout: elapsed = time.time() - started if elapsed >= timeout: break except KeyboardInterrupt: detach_requested = True finally: self.xtc.detach(ws_name, run_name, console_callback) if detach_requested or show_waiting_msg: console.print( "\n<detached from run: {}>".format(full_run_name)) else: desc = self.status_to_desc(run_name, status) console.print(desc)
def hex_dump(fn): console.print("hex dump of: {}\n".format(fn)) # read file raw with open(fn, "rb") as infile: byte_buff = infile.read() start_index = 0 addr = 0 console.print("{:04x} ".format(addr), end="") for i in range(len(byte_buff)): value = byte_buff[i] console.print("{:02x} ".format(value), end="") if (i + 1) % 16 == 0: text = get_nice_text(byte_buff, start_index, i) console.print(" " + text) addr += 16 console.print("{:04x} ".format(addr), end="") start_index = i + 1 # console.print last text i -= 1 if start_index <= i: text = get_nice_text(byte_buff, start_index, i + 1) missing = 15 - ((i + 1) % 16) spaces = " " * missing text = spaces + " " + text console.print(text)
# # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. # # aml_shim.py: AML wants to run a python script, so we use this to launch our shell script import sys import os from xtlib import console # MAIN code args = sys.argv[1:] console.print("aml_shim: args=", args) cmd = args[ 0] # all are passed as a logical string (but args[1] is "1", so don't use that) console.print("aml_shim: about to run cmd=", cmd) os.system(cmd)
# RUN command os.chdir(cwd) fb.reset_feedback() xt_cmds.main(cmd_text, cmd_started) break # main code pid = sys.argv[1] if len(sys.argv) > 1 else None if pid: pid = int(pid) # kill old process before we try to own resources console.print("canceling old version of server: pid=", pid) p = psutil.Process(pid) p.terminate() time.sleep( 2) # wait for job to fully terminate so we can access its resources xtlib_dir = os.path.realpath(os.path.dirname(__file__)) #console.print("xtlib_dir=", xtlib_dir) worker = WatchWorker(xtlib_dir + "/**") worker.start() with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind((HOST, PORT)) s.listen()
def _get_creds_from_login(self, authentication, reason=None): # use normal Key Value from azure.keyvault.secrets import SecretClient if authentication == "auto": authentication = "browser" if pc_utils.has_gui() else "device-code" if authentication == "browser": console.print("authenticating with azure thru browser... ", flush=True, end="") from azure.identity import InteractiveBrowserCredential if self.azure_tenant_id is not None: credential = InteractiveBrowserCredential( tenant_id=self.azure_tenant_id) else: credential = InteractiveBrowserCredential() elif authentication == "device-code": # console.print("authenticating with azure thru device code... ", flush=True, end="") from azure.identity import DeviceCodeCredential from azure.identity._constants import AZURE_CLI_CLIENT_ID console.print( "using device-code authorization (Azure AD currently requires 2-4 authenications here)" ) if self.azure_tenant_id is not None: credential = DeviceCodeCredential( tenant_id=self.azure_tenant_id, client_id=AZURE_CLI_CLIENT_ID) else: credential = DeviceCodeCredential( client_id=AZURE_CLI_CLIENT_ID) else: errors.syntax_error( "unrecognized authentication type '{}'".format(authentication)) new_creds = True outer_token = credential.get_token() token = outer_token.token # expires = outer_token[1] # elapsed = expires - time.time() #print(" [new token expires in {:.2f} mins] ".format(elapsed/60), end="") # get keys from keyvault self.client = SecretClient(self.vault_url, credential=credential) key_text = self.get_secret_live("xt-keys") console.print("authenticated successfully", flush=True) #xt_client_cert = self.get_secret_live("xt-clientcert") xt_server_cert = self.get_secret_live("xt-servercert") # write all our creds to self.keys self.apply_creds(key_text) self.keys["xt_server_cert"] = xt_server_cert self.keys["object_id"] = self.get_me_graph_property(token, "id") # return creds as json string return json.dumps(self.keys)
def monitor_attach_run(self, ws, run_name, show_waiting_msg=True, escape=0): console.print( "") # separate the waiting loop output from previous output attach_attempts = 0 def monitor_work(): nonlocal attach_attempts connected = self.xtc.connect() #azure_task_state, connected, box_name, job_id = self.connect_to_box_for_run(ws, run_name) azure_task_state = None box_name = self.xtc.box_name job_id = "xxxxx" # TODO attach_attempts += 1 if azure_task_state: #console.print("azure_task_state=", azure_task_state) # its an azure-batch controlled run if azure_task_state == "active": text = "Waiting for run to start: {} ({} in azure-batch)".format( run_name.upper(), job_id) elif azure_task_state == "running" and not connected: text = "Waiting for run to initialize: {} ({} in azure-batch)".format( run_name.upper(), job_id) else: # exit monitor loop return azure_task_state, connected, box_name, job_id, attach_attempts else: # its a normal box-controller run if not connected: errors.env_error("could not connect to box: " + box_name) # we are connected, but has run started yet? status_dict = self.xtc.get_status_of_runs(ws, [run_name]) # controller may not have heard of run yet (if we were fast) status = status_dict[ run_name] if run_name in status_dict else "created" if status in ["created", "queued"]: text = "Waiting for run to start: {} (queued to run on {})".format( run_name.upper(), box_name) else: # status is one of running, killed, completed, spawning, ... # exit monitor loop return azure_task_state, connected, box_name, job_id, attach_attempts return text # wait for run to be attachable in a MONITOR LOOP result = monitor_loop(True, monitor_work, "[hit ESCAPE to detach] ", escape) #console.print("") # separate the waiting loop output from subsequent output if result: state, connected, box_name, job_id, attach_attempts = result #console.print("state=", state, ", connected=", connected, ", box_name=", box_name, ", job_id=", job_id) if not connected: if False: # attach_attempts == 1: errors.user_exit( "Unable to attach to run (state={})".format(state)) else: # not an error in this case console.print( "Unable to attach to run (state={})".format(state)) return console.print("<attaching to: {}/{}>\n".format(ws, run_name)) self.attach_task_to_console(ws, run_name, show_waiting_msg=show_waiting_msg, escape=escape) else: # None returned; user cancelled with ESCAPE, so no further action needed pass
def report(self): sz = "{} = {}".format(self.name, self.values) if self.default_setting is not None: sz += " # default {}".format(self.default_setting.value) console.print(sz)
def choose_config(self): # Gather the subset of hparams with multiple values. self.multivalued_hparams = [] for hparam in self.hparams: if hparam.has_multiple_values: self.multivalued_hparams.append(hparam) # for hparam in self.multivalued_hparams: # console.print("{} = {}".format(hparam.name, [setting.value for setting in hparam.settings])) # If there are no runs yet, just return a random configuration. if len(self.runsets) == 0: hp_id__setting_id__list = [] for hparam in self.multivalued_hparams: last_setting_id = len(hparam.settings) - 1 if hparam.default_setting is None: # Select from all settings. setting_id = dgd_rand.randint(0, last_setting_id) else: # Select from the default setting, +/- one. default_setting_id = hparam.default_setting.id min_id = default_setting_id if min_id > 0: min_id -= 1 max_id = default_setting_id if max_id < last_setting_id: max_id += 1 setting_id = dgd_rand.randint(min_id, max_id) hp_id__setting_id__list.append((hparam.id, setting_id)) config_str = str(hp_id__setting_id__list) chosen_runset = RunSet(hp_id__setting_id__list, config_str) chosen_runset.report('Random runset ') return chosen_runset # Find the best runset so far. best_runset = self.runsets[0] best_metric = best_runset.metric for runset in self.runsets: if runset.metric >= best_metric: best_metric = runset.metric best_runset = runset best_runset.report('Best runset ') # Build a neighborhood around (and including) the best runset. neighborhood = [best_runset] for hp_i, hparam in enumerate(self.multivalued_hparams): best_hparam_id = best_runset.hp_id__setting_id__list[hp_i][0] assert hparam.id == best_hparam_id best_setting_id = best_runset.hp_id__setting_id__list[hp_i][1] best_setting = hparam.settings[best_setting_id] # console.print("For hp={}, best config's setting is {}".format(hparam.name, best_setting.value)) if best_setting_id > 0: neighbor = self.get_neighbor_runset(best_runset, hp_i, best_hparam_id, best_setting_id - 1) neighborhood.append(neighbor) if best_setting_id < len(hparam.settings) - 1: neighbor = self.get_neighbor_runset(best_runset, hp_i, best_hparam_id, best_setting_id + 1) neighborhood.append(neighbor) # Choose one runset, weighted by how many runs it needs to exceed those of the runset with the most. ceiling = max([len(runset.runs) for runset in neighborhood]) + 1 console.print("ceiling = {} runs".format(ceiling)) probs = np.zeros((len(neighborhood))) for i, runset in enumerate(neighborhood): gap = max(0, ceiling - runset.num_runs) probs[i] = gap sum = np.sum(probs) probs /= sum for i, runset in enumerate(neighborhood): runset.id = i runset.report(" {:2d} prob={:6.4f}".format(runset.id, probs[i])) chosen_runset = dgd_rand.choices(neighborhood, probs)[0] chosen_runset.report(' {:2d} was chosen '.format(chosen_runset.id)) return chosen_runset
def report(self, title): sz = "{} {}".format(title, self.config_str) if self.metric is not None: sz += " {:12.5f}, {} runs".format(self.metric, self.num_runs) console.print(sz)
def monitor_loop(monitor, func, action_msg="monitoring ", escape_secs=0): ''' set up a loop to continually call 'func' and display its output, until the ESCAPE key is pressed ''' # handle the easy case first if not monitor: text = func() console.print(text, end="") return pc_utils.enable_ansi_escape_chars_on_windows_10() if monitor == True: monitor = 5 # default wait time else: monitor = int(monitor) started = datetime.datetime.now() started2 = time.time() timeout = escape_secs if timeout: timeout = float(timeout) last_result = None # MONITOR LOOP with KeyPressChecker() as checker: while True: result = func() if not isinstance(result, str): # func has decided to stop the monitor loop itself if last_result: console.print("\n") return result if last_result: # erase last result on screen console.print("\r", end="") line_count = len(last_result.split("\n")) - 1 # NOTE: on some systems, the number of lines needed to be erased seems to # vary by 1. when it is too many, it destroys prevous output/commands. until # this is corrected, we pick the lower values that will cause some extra # output on some systems. #line_count += 1 # add 1 for the \n we will use to clearn the line pc_utils.move_cursor_up(line_count, True) elapsed = utils.elapsed_time(started) result += "\n" + action_msg + "(elapsed time: {})...".format( elapsed) console.print(result, end="") sys.stdout.flush() if timeout: elapsed = time.time() - started2 if elapsed >= timeout: console.print("\nmonitor timed out") break # wait a few seconds during refresh if pc_utils.wait_for_escape(checker, monitor): console.print("\nmonitor cancelled") break last_result = result return None
def assign_settings(self, hp_config): console.print('assigning settings') return
if not hparam.has_multiple_values: value = hparam.single_value if value == "$randint()": value = np.random.randint(2147483647) arg_dict[hparam.name] = value # now, output values used in runset for hp_i, hparam in enumerate(self.multivalued_hparams): hparam_id = runset.hp_id__setting_id__list[hp_i][0] assert hparam.id == hparam_id value_id = runset.hp_id__setting_id__list[hp_i][1] value = hparam.settings[value_id] arg_dict[hparam.name] = value.value return arg_dict if __name__ == '__main__': dgd = DGD(unit_test=True) console.print() for hp in dgd.hparams: hp.report() console.print() dgd.runs = [] dgd.runsets = [] dgd.configstr_runset_dict = {} chosen_runset = dgd.choose_config() chosen_runset.report("Chosen runset")
def build_data_frames(self): ''' 1. for each run, collect the reported metrics as metric sets (by reported col list) 2. append to the dataframe for that col list ''' # build "data_frames" no_metrics = [] pp_run_names = [] used_max = False data_frames_by_cols = {} got_columns = False for i, record in enumerate(self.run_log_records): # extract metrics for this run run = record["_id"] node = utils.node_id(record["node_index"]) job = record["job_id"] experiment = record["exper_name"] workspace = record["ws"] search_style = utils.safe_value(record, "search_style") if search_style and search_style != "single": # parent run with children - skip it continue log_records = record["log_records"] metric_sets = run_helper.build_metrics_sets(log_records) if not metric_sets: no_metrics.append(run) continue if self.max_runs and len(pp_run_names) >= self.max_runs: used_max = True break if not got_columns: # set x and y columns explicit = qfe.get_explicit_options() if not "x" in explicit: self.x_col = self.get_actual_x_column( metric_sets, self.x_col, self.col_names) if not self.col_names: # not specified by user, so build defaults self.col_names = self.get_default_y_columns( metric_sets, self.x_col) got_columns = True # merge metric sets into dfx for metric_set in metric_sets: # create a pandas DataFrame df = pd.DataFrame(metric_set["records"]) cols = str(list(df.columns)) # ensure this df has our x_col if self.x_col and not self.x_col in cols: continue # ensure this df has at least 1 y_col found_y = False for y in self.col_names: if y in cols: found_y = True break if not found_y: continue # add run_name column df["run"] = [run] * df.shape[0] df["node"] = [node] * df.shape[0] df["job"] = [job] * df.shape[0] df["experiment"] = [experiment] * df.shape[0] df["workspace"] = [workspace] * df.shape[0] if not cols in data_frames_by_cols: data_frames_by_cols[cols] = df else: dfx = data_frames_by_cols[cols] dfx = dfx.append(df) data_frames_by_cols[cols] = dfx pp_run_names.append(run) if no_metrics: console.print( "\nnote: following runs were skipped (currently have no logged metrics): \n {}\n" .format(", ".join(no_metrics))) if used_max: console.print( "plotting first {} runs (use --max-runs to override)".format( self.max_runs)) else: console.print("plotting {} runs...".format(len(pp_run_names))) # update our list of run_names to proces self.run_names = pp_run_names return data_frames_by_cols