Beispiel #1
0
    def process(self):
        paths = self.args.file_paths
        # Allow additional input from pipe in
        if '-' in paths[:-1]:
            paths.remove('-')
            more_inputs = [l.strip() for l in sys.stdin if l.strip()]
            paths = more_inputs + paths

        l_r_mapping, ignored_paths = \
            File.convert_to_file_mapping(paths[:-1], paths[-1])

        if self.args.dry_run:
            print_info('[DRY RUN] Files to be uploaded:')
            for l, r in l_r_mapping:
                print('{} => {}'.format(l, r))
            if ignored_paths:
                print_warn('[DRY RUN] Files to be ignored (inaccessible):')
                for l in ignored_paths:
                    print(l)
        else:
            result = File.upload(l_r_mapping)
            if self.args.fileset:
                r = result.as_new_file_set(self.args.fileset)
                print(r)
            print("results:")
            print(result)
 def retrieve_result(self, node):
     # Make tmp dir for downloaded result
     tmp_dir = "result_tmp"
     if not os.path.exists(tmp_dir):
         os.mkdir(tmp_dir)
     # Download results
     node_name = node.node_name
     results = {}
     for ver in self.node_versions[node_name]:
         if self.local:
             local_path = "{}/{}:{}/{}_output/{}.pkl".format(
                 MOCK_PATH, node_name, ver, node_name, node_name)
         else:
             remote_path = "{}_output/{}.pkl:{}".format(node_name, node_name, ver)
             local_path = "{}/{}.pkl".format(tmp_dir, node_name)
             File.download({remote_path: local_path})
         rst = pkl.load(open(local_path, "rb"))
         for metric in rst:
             if not isinstance(rst[metric], (int, float, list, dict)):
                 rst.pop(metric, None)
         results[ver] = rst
     self.log_manager.save_result(node_name, results, self.workspace)
     if os.path.exists(tmp_dir):
         import shutil
         shutil.rmtree(tmp_dir)
    def build_scripts(self, node):
        node_name = node.node_name
        script_path = node.script_path[:-3]
        script_name = script_path.split('/')[-1]
        local_generated_script_name = "{}_{}.py".format(self.workspace, node_name)
        cloud_generated_script_name = "_{}.py".format(node_name)

        if os.path.exists(local_generated_script_name):
            return
        fs = open(local_generated_script_name, "w")

        # Import necessary function & tool
        fs.write("from {} import {}\n".format('.'.join(script_path.split('/')), script_name))
        fs.write("import argparse\n")
        fs.write("import pickle as pkl\n")
        fs.write("import os\n")

        # Build argument parser
        fs.write("parser=argparse.ArgumentParser()\n")
        for hp in node.hyper_parameter:
            fs.write("parser.add_argument('--{}', type={})\n".format(hp['name'],
                     'float' if hp['type'] == 'float' else 'str'))
        fs.write("args=parser.parse_args()\n")
        # Get input data and hyper parameters
        fs.write("inputs=dict()\n")
        for in_node in node.input_nodes:
            fs.write("inputs['{}']=pkl.load(open('{}_output/{}.pkl', 'rb'))\n"
                     .format(in_node.node_name, in_node.node_name, in_node.node_name))
        fs.write("hps=dict()\n")
        for hp in node.hyper_parameter:
            fs.write("hps['{}']=args.{}\n".format(hp['name'], hp['name']))
        # Call function
        fs.write("rst={}(inputs, hps)\n".format(script_name))
        # Save the result
        fs.write("if not os.path.exists('{}_output'):\n".format(node_name))
        fs.write("\tos.mkdir('{}_output')\n".format(node_name))
        fs.write("pkl.dump(rst, open('{}_output/{}.pkl', 'wb'))\n".format(node_name, node_name))
        # Compress the script and submit to ACAI system
        fs.close()

        if self.local:
            return
        # zip generated script and upload
        local_zip_filename = "{}_{}.zip".format(self.workspace, node_name)
        cloud_zip_filename = "_{}.zip".format(node_name)
        print(local_generated_script_name, cloud_generated_script_name, local_zip_filename, cloud_zip_filename)
        with ZipFile(local_zip_filename, "w") as zipf:
            zipf.write(local_generated_script_name, cloud_generated_script_name)
        File.upload([(local_zip_filename, cloud_zip_filename)])
Beispiel #4
0
 def process(self):
     if self.args.action == 'ls':
         if not self.args.directory:
             self.args.directory = '/'
         r = File.list_dir(self.args.directory)
         for d in r:
             path = d['path']
             if d['is_dir']:
                 path += '/'
             print(path)
     elif self.args.action == 'versions':
         r = File.list_file_versions(self.args.file)
         for path in r:
             print(path)
     elif self.args.action == 'tag':
         MetaCommand.tag(MetaCommand.EntityType.FILE, self.args)
     elif self.args.action == 'untag':
         MetaCommand.untag(MetaCommand.EntityType.FILE, self.args)
     elif self.args.action == 'find':
         MetaCommand.find(MetaCommand.EntityType.FILE, self.args)
Beispiel #5
0
    def list_dir(dir_path, with_meta: bool):
        if not dir_path:
            # Nothing given, list root directory.
            dir_path = '/'
        r = File.list_dir(dir_path)
        explicit_paths = []

        for d in r:
            if d['is_dir']:
                path = d['path'] + '/'
                explicit_paths.append(path)
            else:
                versioned_path = ':'.join([d['path'], str(d['version'])])
                if dir_path == '/':
                    dir_path = ''
                explicit_paths.append(os.path.join(dir_path, versioned_path))

        ListCommand._print_files(dir_path, explicit_paths, with_meta=with_meta)
Beispiel #6
0
    def load_graph(self):
        # Read config YAML file
        with open(self.workspace + "config.yaml", 'r') as stream:
            config = yaml.safe_load(stream)

        # read version history from disk
        history = {}
        if self.is_exist('_history.yaml'):
            with open(self.workspace + "_history.yaml", 'r') as stream:
                history = yaml.safe_load(stream)

        # read credentials
        project_name = config['pipeline']['project_name']
        user_name = config['pipeline']['user_name']
        admin_token = config['pipeline']['admin_token']

        # create project / user if this is first time
        if not history:
            r = Project.create_project(project_name, admin_token, 'proj_admin')
            r = Project.create_user(project_name, r['project_admin_token'],
                                    user_name)
            history['credentials'] = {
                'ACAI_PROJECT': project_name,
                'ACAI_TOKEN': r['user_token']
            }

        # update environment var with credentials
        for key in history['credentials']:
            os.environ[key] = history['credentials'][key]

        # get from history: last snapshot of workspace: file modified times
        file_versions = {}
        if 'file_versions' in history:
            file_versions = history['file_versions']
            # {filepath: {"time": latest modified time, "version": version}} dict

        # get current file modified times
        # figure out set of modified files
        # update versions map
        curr_files = self.list_all_file_paths(self.workspace)
        modified = set()
        for curr_file in curr_files:
            curr_mod_time = self.get_modified_time(curr_file)
            if curr_file not in file_versions or curr_mod_time != file_versions[
                    curr_file]['time']:
                # curr_file is modified
                modified.add(curr_file)
                # update versions map
                curr_version = 0 if curr_file not in file_versions else file_versions[
                    curr_file]['version'] + 1
                file_versions[curr_file] = {
                    "time": curr_mod_time,
                    "version": curr_version
                }

        # figure out set of needed files for current run
        needed = set()
        for module in config['modules']:
            script_path = module['script']
            needed.add(script_path)
            dependencies = {} if 'dependencies' not in module else module[
                'dependencies']
            for path in dependencies:
                for needed_file in self.list_all_file_paths(path):
                    needed.add(needed_file)

        # for a file that is both needed for current run and has been modified from last snapshot, upload it
        if not self.local_mode:
            for needed_file in needed:
                if needed_file in modified:
                    input_dir = os.path.join(self.workspace, needed_file)
                    print("uploading: " + needed_file)
                    File.upload([(input_dir, needed_file)],
                                []).as_new_file_set(needed_file)

        # load script versions to prepare for subsequent stage of generating "compute nodes"
        # note that a script version is dependent on both script file version and dependent file versions
        script_versions = {}
        if 'script_versions' in history:
            script_versions = history['script_versions']
            # {node_name: {"version":version, "script_path":script_path}

        # init optimizer info in case this is an optimization job
        optimization_info = {
            "result_node": None,  # pointer to result node in computation graph
            "search": "grid",  # "grid" or "bayesian"
            "metric": "",  # name of the optimization metric
            "direction":
            "max",  # "max" or "min", direction for metric optimization
        }

        # create compute nodes
        compute_nodes = {}
        uploaded = set()
        for module in config['modules']:
            node_name = module['node']
            script_path = module['script']

            # compute script version:
            if node_name not in script_versions:
                script_versions[node_name] = {
                    "version": 0,
                    "script_path": script_path
                }
            else:
                print(script_versions)
                past_version = script_versions[node_name]["version"]
                past_script = script_versions[node_name]["script_path"]
                if script_path != past_script:
                    script_versions[node_name] = {
                        "version": past_version + 1,
                        "script_path": script_path
                    }
                else:
                    # get all needed files (one script file + optional data files) for this node
                    needed_files = [script_path]
                    dependencies = {} if 'dependencies' not in module else module[
                        'dependencies']
                    for path in dependencies:
                        needed_files += self.list_all_file_paths(path)

                    # if any of the needed files is in modified set, increment script version by 1
                    for needed_file in needed_files:
                        if needed_file in modified:
                            script_versions[node_name]["version"] += 1
                            break

            # continue building the new node
            hyperparams = {} if 'params' not in module else module['params']
            input_nodes = [] if 'input_nodes' not in module else module[
                'input_nodes']
            newnode = Node(
                node_name=node_name,
                script_path=script_path,
                script_version=script_versions[node_name]["version"],
                input_nodes=input_nodes,
                output_nodes=[],
                dependencies=[]
                if 'dependencies' not in module else module['dependencies'],
                hyperparams=hyperparams,
                isResult=('optimize' in module))
            compute_nodes[node_name] = newnode

            # check if this node is the one to optimize
            if 'optimize' in module:
                opt = module['optimize'][0]
                optimization_info['result_node'] = newnode
                optimization_info["search"] = opt['search'].lower()
                optimization_info["metric"] = opt['metric']
                optimization_info["direction"] = opt['direction'].lower()

        # save new history dict to disk
        history['script_versions'] = script_versions
        history['file_versions'] = file_versions
        with io.open(self.workspace + '_history.yaml', 'w+',
                     encoding='utf8') as outfile:
            yaml.dump(history,
                      outfile,
                      default_flow_style=False,
                      allow_unicode=True)

        # second pass to connect in/out edges between nodes
        graph = []
        for node_name in compute_nodes:
            node = compute_nodes[node_name]
            input_node_names = node.input_nodes
            node.input_nodes = []
            input_nodes = [compute_nodes[name] for name in input_node_names]
            for in_node in input_nodes:
                node.input_nodes.append(in_node)
                in_node.output_nodes.append(node)
            graph.append(node)

        print(optimization_info)
        return graph, optimization_info
Beispiel #7
0
        print(e)
        sys.exit(1)

    publisher = Publisher(job_id,
                          user_id,
                          host=redis_host,
                          port=redis_port,
                          pwd=redis_pwd)

    with cd(data_lake):
        publisher.progress("Downloading")
        FileSet.download_file_set(input_file_set, ".", force=True)

        # Download and unzip code
        code_path = "./" + code
        File.download({code: code_path})
        with zipfile.ZipFile(code_path, "r") as ref:
            ref.extractall()

        # Run user code
        publisher.progress("Running")

        log_publisher = subprocess.Popen(
            [
                "python",
                "../job-agent/log_publisher.py",
                job_id,
                user_id,
                redis_host,
                redis_port,
                redis_pwd,
 def run_workflow_optim(self):
     last_rst = None
     hps = None
     best_rst = None
     best_rsts = []
     no_improve_count = 0
     max_no_improve_count = 10
     count = 1
     # Run workflow
     while no_improve_count < max_no_improve_count:
         print(colored("Starting round {} of hyper parameter search...".format(count), 'blue'))
         hps = self.searcher.get_next_hps(hps, last_rst)
         # Reset in-degree for each node
         for node in self.graph:
             node.input_nodes_num = len(node.input_nodes)
         # Execute nodes with zero indegree
         q = []
         for node in self.graph:
             if node.input_nodes_num == 0:
                 q.append(node)
         # Count the number of executed nodes
         exec_count = 0
         # Keep looping until all nodes are executed
         while exec_count < len(self.graph):
             # Constantly check if new nodes are added to the queue
             while not q:
                 time.sleep(SLEEP_INTERVAL)
             # Submit current node for execution in a new thread
             run_node = Thread(target=self.submit_node_optim, args=(q.pop(0), q, hps))
             run_node.start()
             exec_count += 1
         run_node.join()
         # Download latest result
         result_node = self.optim_info['result_node']
         result_node_name = result_node.node_name
         if self.local:
             result_path = "{}/{}:{}/{}_output/{}.pkl".format(
                 MOCK_PATH, result_node_name, result_node.last_ver, result_node_name, result_node_name)
         else:
             result_path = "tmp_{}.pkl".format(result_node_name)
             File.download({"{}_output/{}.pkl".format(result_node_name, result_node_name)
                            : result_path})
         # Get target metric value
         result = pkl.load(open(result_path, "rb"))
         last_rst = result[self.optim_info['metric']]
         assert isinstance(last_rst, (int, float))
         if self.optim_info['direction'] == 'min':
             last_rst = -last_rst
         if not self.local and os.path.exists(result_path):
             os.remove(result_path)
         # Update best result
         if not best_rst or last_rst > best_rst:
             best_rst = last_rst
             no_improve_count = 0
             print(colored("New best result! {}:{}".format(self.optim_info['metric'], best_rst), 'blue'))
         else:
             no_improve_count += 1
             print(colored("No improvement in {} continuous searches".format(no_improve_count), 'blue'))
         count += 1
         best_rsts.append(best_rst)
     # Draw performance curve
     output_path = self.workspace + "/" + OUTPUT_PATH
     if not os.path.exists(output_path):
         os.mkdir(output_path)
     xind = np.arange(1, len(best_rsts) + 1)
     plt.plot(xind, best_rsts, 'b', marker='^')
     plt.title('Performance curve with search iterations')
     plt.xlabel('# of search iterations')
     plt.ylabel(self.optim_info['metric'])
     plt.savefig('{}/{}_curve.pdf'.format(output_path, self.optim_info['search']))
Beispiel #9
0
# # In[36]:


# # @evaluation
# # you can inspect your token in multiple ways
# print(r['user_token'])
# print(os.environ['ACAI_TOKEN'])
# print(credentials.get_credentials())



credentials.login('bWEPDTmZIFiSJbYw15FXevi0FZ3bp2Tk')

# Upload code
code = os.path.join(workspace, 'wordcount.zip')
File.upload({code: '/wordcount.zip'})

# Upload input files and create file set
input_dir = os.path.join(workspace, 'Shakespeare')
File.convert_to_file_mapping([input_dir], 'Shakespeare/').files_to_upload.upload().as_new_file_set('shakespeare.works')

# Run a job
job_setting = {
    "v_cpu": "0.2",
    "memory": "256Mi",
    "gpu": "0",
    "command": "mkdir -p ./my_output/ && (cat Shakespeare/* | python3 wordcount.py ./my_output/)",
    "container_image": "pytorch/pytorch",
    'input_file_set': 'shakespeare.works',
    'output_path': './my_output/',
    'code': '/wordcount.zip',