def training(job_name, package_path, image_tag, parameters, num_gpu, user_args): dst_path = '' job_id = None project = os.getenv('PROJECT', None) if project is None: click.echo( "environment variable PROJECT not defined!, use export PROJECT=<project name> before training." ) return if package_path: package_path = os.path.expanduser(package_path) if not os.path.exists(package_path): if not (package_path.startswith("ssh") or package_path.startswith("http")): click.echo( '--package-path: {} not exists!'.format(package_path)) return if parameters: if not os.path.exists(parameters): click.echo('parameter file: {} not exists'.format(parameters)) with open(parameters, 'r') as pfile: parameters = pfile.read() try: api = MyelindlApi() args = [a for a in user_args] remote_path = None remote_real_path = None result = api.job_create( project=project, image_tag=image_tag, job_name=job_name, pkg_path=package_path, parameters=parameters, num_gpu=num_gpu, user_args=' '.join(args), ) job_id = result['id'] if package_path: if (package_path.startswith("ssh://") or package_path.startswith("http")): pass else: bk_path = os.path.join("bk", job_id) subprocess.call("mc cp --recursive {} {}".format( package_path, bk_path), shell=True) click.echo('Job id: {}'.format(job_id)) result = api.job_train(job_id) click.echo('Job {}: {}'.format(job_name, result)) except MyelindlApiError as e: if job_id: api.job_delete(job_id) click.echo("submit failed, %s" % str(e)) raise
def delete(job_id): try: api = MyelindlApi() result = api.job_delete(job_id) click.echo('Job {} deleted '.format(job_id)) except Exception, e: click.echo("failed, {}".format(e)) raise