def process(self): paths = self.args.file_paths # Allow additional input from pipe in if '-' in paths[:-1]: paths.remove('-') more_inputs = [l.strip() for l in sys.stdin if l.strip()] paths = more_inputs + paths l_r_mapping, ignored_paths = \ File.convert_to_file_mapping(paths[:-1], paths[-1]) if self.args.dry_run: print_info('[DRY RUN] Files to be uploaded:') for l, r in l_r_mapping: print('{} => {}'.format(l, r)) if ignored_paths: print_warn('[DRY RUN] Files to be ignored (inaccessible):') for l in ignored_paths: print(l) else: result = File.upload(l_r_mapping) if self.args.fileset: r = result.as_new_file_set(self.args.fileset) print(r) print("results:") print(result)
def retrieve_result(self, node): # Make tmp dir for downloaded result tmp_dir = "result_tmp" if not os.path.exists(tmp_dir): os.mkdir(tmp_dir) # Download results node_name = node.node_name results = {} for ver in self.node_versions[node_name]: if self.local: local_path = "{}/{}:{}/{}_output/{}.pkl".format( MOCK_PATH, node_name, ver, node_name, node_name) else: remote_path = "{}_output/{}.pkl:{}".format(node_name, node_name, ver) local_path = "{}/{}.pkl".format(tmp_dir, node_name) File.download({remote_path: local_path}) rst = pkl.load(open(local_path, "rb")) for metric in rst: if not isinstance(rst[metric], (int, float, list, dict)): rst.pop(metric, None) results[ver] = rst self.log_manager.save_result(node_name, results, self.workspace) if os.path.exists(tmp_dir): import shutil shutil.rmtree(tmp_dir)
def build_scripts(self, node): node_name = node.node_name script_path = node.script_path[:-3] script_name = script_path.split('/')[-1] local_generated_script_name = "{}_{}.py".format(self.workspace, node_name) cloud_generated_script_name = "_{}.py".format(node_name) if os.path.exists(local_generated_script_name): return fs = open(local_generated_script_name, "w") # Import necessary function & tool fs.write("from {} import {}\n".format('.'.join(script_path.split('/')), script_name)) fs.write("import argparse\n") fs.write("import pickle as pkl\n") fs.write("import os\n") # Build argument parser fs.write("parser=argparse.ArgumentParser()\n") for hp in node.hyper_parameter: fs.write("parser.add_argument('--{}', type={})\n".format(hp['name'], 'float' if hp['type'] == 'float' else 'str')) fs.write("args=parser.parse_args()\n") # Get input data and hyper parameters fs.write("inputs=dict()\n") for in_node in node.input_nodes: fs.write("inputs['{}']=pkl.load(open('{}_output/{}.pkl', 'rb'))\n" .format(in_node.node_name, in_node.node_name, in_node.node_name)) fs.write("hps=dict()\n") for hp in node.hyper_parameter: fs.write("hps['{}']=args.{}\n".format(hp['name'], hp['name'])) # Call function fs.write("rst={}(inputs, hps)\n".format(script_name)) # Save the result fs.write("if not os.path.exists('{}_output'):\n".format(node_name)) fs.write("\tos.mkdir('{}_output')\n".format(node_name)) fs.write("pkl.dump(rst, open('{}_output/{}.pkl', 'wb'))\n".format(node_name, node_name)) # Compress the script and submit to ACAI system fs.close() if self.local: return # zip generated script and upload local_zip_filename = "{}_{}.zip".format(self.workspace, node_name) cloud_zip_filename = "_{}.zip".format(node_name) print(local_generated_script_name, cloud_generated_script_name, local_zip_filename, cloud_zip_filename) with ZipFile(local_zip_filename, "w") as zipf: zipf.write(local_generated_script_name, cloud_generated_script_name) File.upload([(local_zip_filename, cloud_zip_filename)])
def process(self): if self.args.action == 'ls': if not self.args.directory: self.args.directory = '/' r = File.list_dir(self.args.directory) for d in r: path = d['path'] if d['is_dir']: path += '/' print(path) elif self.args.action == 'versions': r = File.list_file_versions(self.args.file) for path in r: print(path) elif self.args.action == 'tag': MetaCommand.tag(MetaCommand.EntityType.FILE, self.args) elif self.args.action == 'untag': MetaCommand.untag(MetaCommand.EntityType.FILE, self.args) elif self.args.action == 'find': MetaCommand.find(MetaCommand.EntityType.FILE, self.args)
def list_dir(dir_path, with_meta: bool): if not dir_path: # Nothing given, list root directory. dir_path = '/' r = File.list_dir(dir_path) explicit_paths = [] for d in r: if d['is_dir']: path = d['path'] + '/' explicit_paths.append(path) else: versioned_path = ':'.join([d['path'], str(d['version'])]) if dir_path == '/': dir_path = '' explicit_paths.append(os.path.join(dir_path, versioned_path)) ListCommand._print_files(dir_path, explicit_paths, with_meta=with_meta)
def load_graph(self): # Read config YAML file with open(self.workspace + "config.yaml", 'r') as stream: config = yaml.safe_load(stream) # read version history from disk history = {} if self.is_exist('_history.yaml'): with open(self.workspace + "_history.yaml", 'r') as stream: history = yaml.safe_load(stream) # read credentials project_name = config['pipeline']['project_name'] user_name = config['pipeline']['user_name'] admin_token = config['pipeline']['admin_token'] # create project / user if this is first time if not history: r = Project.create_project(project_name, admin_token, 'proj_admin') r = Project.create_user(project_name, r['project_admin_token'], user_name) history['credentials'] = { 'ACAI_PROJECT': project_name, 'ACAI_TOKEN': r['user_token'] } # update environment var with credentials for key in history['credentials']: os.environ[key] = history['credentials'][key] # get from history: last snapshot of workspace: file modified times file_versions = {} if 'file_versions' in history: file_versions = history['file_versions'] # {filepath: {"time": latest modified time, "version": version}} dict # get current file modified times # figure out set of modified files # update versions map curr_files = self.list_all_file_paths(self.workspace) modified = set() for curr_file in curr_files: curr_mod_time = self.get_modified_time(curr_file) if curr_file not in file_versions or curr_mod_time != file_versions[ curr_file]['time']: # curr_file is modified modified.add(curr_file) # update versions map curr_version = 0 if curr_file not in file_versions else file_versions[ curr_file]['version'] + 1 file_versions[curr_file] = { "time": curr_mod_time, "version": curr_version } # figure out set of needed files for current run needed = set() for module in config['modules']: script_path = module['script'] needed.add(script_path) dependencies = {} if 'dependencies' not in module else module[ 'dependencies'] for path in dependencies: for needed_file in self.list_all_file_paths(path): needed.add(needed_file) # for a file that is both needed for current run and has been modified from last snapshot, upload it if not self.local_mode: for needed_file in needed: if needed_file in modified: input_dir = os.path.join(self.workspace, needed_file) print("uploading: " + needed_file) File.upload([(input_dir, needed_file)], []).as_new_file_set(needed_file) # load script versions to prepare for subsequent stage of generating "compute nodes" # note that a script version is dependent on both script file version and dependent file versions script_versions = {} if 'script_versions' in history: script_versions = history['script_versions'] # {node_name: {"version":version, "script_path":script_path} # init optimizer info in case this is an optimization job optimization_info = { "result_node": None, # pointer to result node in computation graph "search": "grid", # "grid" or "bayesian" "metric": "", # name of the optimization metric "direction": "max", # "max" or "min", direction for metric optimization } # create compute nodes compute_nodes = {} uploaded = set() for module in config['modules']: node_name = module['node'] script_path = module['script'] # compute script version: if node_name not in script_versions: script_versions[node_name] = { "version": 0, "script_path": script_path } else: print(script_versions) past_version = script_versions[node_name]["version"] past_script = script_versions[node_name]["script_path"] if script_path != past_script: script_versions[node_name] = { "version": past_version + 1, "script_path": script_path } else: # get all needed files (one script file + optional data files) for this node needed_files = [script_path] dependencies = {} if 'dependencies' not in module else module[ 'dependencies'] for path in dependencies: needed_files += self.list_all_file_paths(path) # if any of the needed files is in modified set, increment script version by 1 for needed_file in needed_files: if needed_file in modified: script_versions[node_name]["version"] += 1 break # continue building the new node hyperparams = {} if 'params' not in module else module['params'] input_nodes = [] if 'input_nodes' not in module else module[ 'input_nodes'] newnode = Node( node_name=node_name, script_path=script_path, script_version=script_versions[node_name]["version"], input_nodes=input_nodes, output_nodes=[], dependencies=[] if 'dependencies' not in module else module['dependencies'], hyperparams=hyperparams, isResult=('optimize' in module)) compute_nodes[node_name] = newnode # check if this node is the one to optimize if 'optimize' in module: opt = module['optimize'][0] optimization_info['result_node'] = newnode optimization_info["search"] = opt['search'].lower() optimization_info["metric"] = opt['metric'] optimization_info["direction"] = opt['direction'].lower() # save new history dict to disk history['script_versions'] = script_versions history['file_versions'] = file_versions with io.open(self.workspace + '_history.yaml', 'w+', encoding='utf8') as outfile: yaml.dump(history, outfile, default_flow_style=False, allow_unicode=True) # second pass to connect in/out edges between nodes graph = [] for node_name in compute_nodes: node = compute_nodes[node_name] input_node_names = node.input_nodes node.input_nodes = [] input_nodes = [compute_nodes[name] for name in input_node_names] for in_node in input_nodes: node.input_nodes.append(in_node) in_node.output_nodes.append(node) graph.append(node) print(optimization_info) return graph, optimization_info
print(e) sys.exit(1) publisher = Publisher(job_id, user_id, host=redis_host, port=redis_port, pwd=redis_pwd) with cd(data_lake): publisher.progress("Downloading") FileSet.download_file_set(input_file_set, ".", force=True) # Download and unzip code code_path = "./" + code File.download({code: code_path}) with zipfile.ZipFile(code_path, "r") as ref: ref.extractall() # Run user code publisher.progress("Running") log_publisher = subprocess.Popen( [ "python", "../job-agent/log_publisher.py", job_id, user_id, redis_host, redis_port, redis_pwd,
def run_workflow_optim(self): last_rst = None hps = None best_rst = None best_rsts = [] no_improve_count = 0 max_no_improve_count = 10 count = 1 # Run workflow while no_improve_count < max_no_improve_count: print(colored("Starting round {} of hyper parameter search...".format(count), 'blue')) hps = self.searcher.get_next_hps(hps, last_rst) # Reset in-degree for each node for node in self.graph: node.input_nodes_num = len(node.input_nodes) # Execute nodes with zero indegree q = [] for node in self.graph: if node.input_nodes_num == 0: q.append(node) # Count the number of executed nodes exec_count = 0 # Keep looping until all nodes are executed while exec_count < len(self.graph): # Constantly check if new nodes are added to the queue while not q: time.sleep(SLEEP_INTERVAL) # Submit current node for execution in a new thread run_node = Thread(target=self.submit_node_optim, args=(q.pop(0), q, hps)) run_node.start() exec_count += 1 run_node.join() # Download latest result result_node = self.optim_info['result_node'] result_node_name = result_node.node_name if self.local: result_path = "{}/{}:{}/{}_output/{}.pkl".format( MOCK_PATH, result_node_name, result_node.last_ver, result_node_name, result_node_name) else: result_path = "tmp_{}.pkl".format(result_node_name) File.download({"{}_output/{}.pkl".format(result_node_name, result_node_name) : result_path}) # Get target metric value result = pkl.load(open(result_path, "rb")) last_rst = result[self.optim_info['metric']] assert isinstance(last_rst, (int, float)) if self.optim_info['direction'] == 'min': last_rst = -last_rst if not self.local and os.path.exists(result_path): os.remove(result_path) # Update best result if not best_rst or last_rst > best_rst: best_rst = last_rst no_improve_count = 0 print(colored("New best result! {}:{}".format(self.optim_info['metric'], best_rst), 'blue')) else: no_improve_count += 1 print(colored("No improvement in {} continuous searches".format(no_improve_count), 'blue')) count += 1 best_rsts.append(best_rst) # Draw performance curve output_path = self.workspace + "/" + OUTPUT_PATH if not os.path.exists(output_path): os.mkdir(output_path) xind = np.arange(1, len(best_rsts) + 1) plt.plot(xind, best_rsts, 'b', marker='^') plt.title('Performance curve with search iterations') plt.xlabel('# of search iterations') plt.ylabel(self.optim_info['metric']) plt.savefig('{}/{}_curve.pdf'.format(output_path, self.optim_info['search']))
# # In[36]: # # @evaluation # # you can inspect your token in multiple ways # print(r['user_token']) # print(os.environ['ACAI_TOKEN']) # print(credentials.get_credentials()) credentials.login('bWEPDTmZIFiSJbYw15FXevi0FZ3bp2Tk') # Upload code code = os.path.join(workspace, 'wordcount.zip') File.upload({code: '/wordcount.zip'}) # Upload input files and create file set input_dir = os.path.join(workspace, 'Shakespeare') File.convert_to_file_mapping([input_dir], 'Shakespeare/').files_to_upload.upload().as_new_file_set('shakespeare.works') # Run a job job_setting = { "v_cpu": "0.2", "memory": "256Mi", "gpu": "0", "command": "mkdir -p ./my_output/ && (cat Shakespeare/* | python3 wordcount.py ./my_output/)", "container_image": "pytorch/pytorch", 'input_file_set': 'shakespeare.works', 'output_path': './my_output/', 'code': '/wordcount.zip',