def train(dir): """ Command to train ML model(s) locally """ logger.info(ASCII_LOGO) logger.info("Started local training...\n") sagify_module_path = os.path.join(dir, 'sagify') local_train_script_path = os.path.join(sagify_module_path, 'local_test', 'train_local.sh') test_path = os.path.join(sagify_module_path, 'local_test', 'test_dir') if not os.path.isdir(test_path): logger.info("This is not a sagify directory: {}".format(dir)) sys.exit(-1) try: subprocess.check_output([ "{}".format(local_train_script_path), "{}".format(os.path.abspath(test_path)) ]) logger.info("Local training completed successfully!") except Exception as e: logger.info("{}".format(e)) return
def _run_test_script(self, filename='mytestscript.py', interpreter=sys.executable): # Absolute file path: fn = self.tempdir + filename try: output = check_output([interpreter, fn], env=self.env, stderr=STDOUT) except CalledProcessError as e: with open(fn) as f: msg = ('Error running the command %s\n' '%s\n' 'Contents of file %s:\n' '\n' '%s') % ( ' '.join([interpreter, fn]), 'env=%s' % self.env, fn, '----\n%s\n----' % f.read(), ) if not hasattr(e, 'output'): # The attribute CalledProcessError.output doesn't exist on Py2.6 e.output = None raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output) return output
def build(dir, requirements_dir, docker_tag): """ Builds a Docker image that contains code under the given source root directory. Assumes that Docker is installed and running locally. :param dir: [str], source root directory :param requirements_dir: [str], path to requirements.txt """ sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/')) build_script_path = os.path.join(sagify_module_path, 'build.sh') dockerfile_path = os.path.join(sagify_module_path, 'Dockerfile') train_file_path = os.path.join(sagify_module_path, 'training', 'train') serve_file_path = os.path.join(sagify_module_path, 'prediction', 'serve') executor_file_path = os.path.join(sagify_module_path, 'executor.sh') if not os.path.isfile(build_script_path) or not os.path.isfile(train_file_path) or not \ os.path.isfile(serve_file_path): raise ValueError("This is not a sagify directory: {}".format(dir)) os.chmod(train_file_path, 0o777) os.chmod(serve_file_path, 0o777) os.chmod(executor_file_path, 0o777) target_dir_name = os.path.basename(os.path.normpath(dir)) output = subprocess.check_output([ "{}".format(build_script_path), "{}".format(os.path.relpath(dir)), "{}".format(os.path.relpath(target_dir_name)), "{}".format(dockerfile_path), "{}".format(os.path.relpath(requirements_dir)), docker_tag ]) logger.debug(output)
def push(dir, docker_tag, aws_region, iam_role_arn, aws_profile, external_id, image_name): """ Push Docker image to AWS ECS :param dir: [str], source root directory :param docker_tag: [str], the Docker tag for the image :param aws_region: [str], the AWS region to push the image to :param iam_role_arn: [str], the AWS role used to push the image to ECR :param aws_profile: [str], the AWS profile used to push the image to ECR :param external_id: [str], Optional external id used when using an IAM role :param image_name: [str], The name of the Docker image """ sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/')) push_script_path = os.path.join(sagify_module_path, 'push.sh') if not os.path.isfile(push_script_path): raise ValueError("This is not a sagify directory: {}".format(dir)) output = subprocess.check_output([ "{}".format(push_script_path), docker_tag, aws_region, iam_role_arn, aws_profile, external_id, image_name ]) logger.debug(output)
def _run_test_script(self, filename='mytestscript.py', interpreter=sys.executable): # Absolute file path: fn = self.tempdir + filename try: output = check_output([interpreter, fn], env=self.env, stderr=STDOUT) except CalledProcessError as e: with open(fn) as f: msg = ( 'Error running the command %s\n' '%s\n' 'Contents of file %s:\n' '\n' '%s') % ( ' '.join([interpreter, fn]), 'env=%s' % self.env, fn, '----\n%s\n----' % f.read(), ) if not hasattr(e, 'output'): # The attribute CalledProcessError.output doesn't exist on Py2.6 e.output = None raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output) return output
def download_apk(storage, handle, tmp_dir, apk_name, apk_tmp): """ Download the APK from Google Play for the given handle. :param storage: minio storage helper :param handle: application handle to download :param tmp_dir: download destination directory :param apk_name: name of the APK in Minio storage :param apk_tmp: apk temporary name :return: True if succeed, False otherwise """ DEVICE_CODE_NAMES = [ 'bacon', 'hammerhead', 'manta', 'cloudbook', 'bullhead' ] RETRY_PER_DEVICE = 3 refreshed_token = False for device in DEVICE_CODE_NAMES: cmd = 'gplaycli -v -a -y -pd {} -dc {} -f {}/'.format( handle, device, tmp_dir) for i in range(RETRY_PER_DEVICE): try: output = subprocess.check_output( shlex.split(cmd), stderr=subprocess.STDOUT, timeout=240 # Timeout of 4 minutes ) output_str = output.decode('utf-8') logging.info(output_str) if '[ERROR]' in output_str: filtered = output_str.replace( '[ERROR] cache file does not exists or is corrupted', '') if '[ERROR]' in filtered: raise RuntimeError('Error while downloading apk file') apk = Path(apk_tmp) if apk.is_file(): try: storage.put_file(apk_tmp, apk_name) return True except ResponseError as err: logging.info(err) return False except TimeoutExpired: logging.warning("Timeout of gplaycli download command") return False except Exception as e: logging.info(e) logging.info("Could not download with device {}".format(device)) if not refreshed_token: remove_token() refreshed_token = True time.sleep(2) return False
def build(dir, requirements_dir): """ Command to build SageMaker app """ logger.info(ASCII_LOGO) logger.info( "Started building SageMaker Docker image. It will take some minutes...\n" ) sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/')) build_script_path = os.path.join(sagify_module_path, 'build.sh') dockerfile_path = os.path.join(sagify_module_path, 'Dockerfile') train_file_path = os.path.join(sagify_module_path, 'training', 'train') serve_file_path = os.path.join(sagify_module_path, 'prediction', 'serve') executor_file_path = os.path.join(sagify_module_path, 'executor.sh') if not os.path.isfile(build_script_path) or not os.path.isfile(train_file_path) or not \ os.path.isfile(serve_file_path): logger.info("This is not a sagify directory: {}".format(dir)) sys.exit(-1) os.chmod(train_file_path, 0o777) os.chmod(serve_file_path, 0o777) os.chmod(executor_file_path, 0o777) target_dir_name = os.path.basename(os.path.normpath(dir)) try: subprocess.check_output([ "{}".format(build_script_path), "{}".format(os.path.relpath(dir)), "{}".format(os.path.relpath(target_dir_name)), "{}".format(dockerfile_path), "{}".format(os.path.relpath(requirements_dir)) ]) logger.info("Docker image built successfully!") except Exception as e: logger.info("{}".format(e)) return
def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2), all_imports=False, from3=False, conservative=False): params = [] stages = list(stages) if all_imports: params.append('--all-imports') if from3: script = 'pasteurize.py' else: script = 'futurize.py' if stages == [1]: params.append('--stage1') elif stages == [2]: params.append('--stage2') else: assert stages == [1, 2] if conservative: params.append('--conservative') # No extra params needed # Absolute file path: fn = self.tempdir + filename call_args = [sys.executable, script] + params + ['-w', fn] try: output = check_output(call_args, stderr=STDOUT, env=self.env) except CalledProcessError as e: with open(fn) as f: msg = ('Error running the command %s\n' '%s\n' 'Contents of file %s:\n' '\n' '%s') % ( ' '.join(call_args), 'env=%s' % self.env, fn, '----\n%s\n----' % f.read(), ) ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError) if not hasattr(e, 'output'): # The attribute CalledProcessError.output doesn't exist on Py2.6 e.output = None raise ErrorClass(msg, e.returncode, e.cmd, output=e.output) return output
def push(dir): """ Command to push Docker image to AWS ECS """ logger.info(ASCII_LOGO) logger.info( "Started pushing Docker image to AWS ECS. It will take some time. Please, be patient...\n" ) sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/')) push_script_path = os.path.join(sagify_module_path, 'push.sh') if not os.path.isfile(push_script_path): logger.info("This is not a sagify directory: {}".format(dir)) sys.exit(-1) try: subprocess.check_output(["{}".format(push_script_path)]) logger.info("Docker image pushed to ECS successfully!") except Exception as e: logger.info("{}".format(e)) return
def push(dir, docker_tag): """ Push Docker image to AWS ECS :param dir: [str], source root directory """ sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/')) push_script_path = os.path.join(sagify_module_path, 'push.sh') if not os.path.isfile(push_script_path): raise ValueError("This is not a sagify directory: {}".format(dir)) output = subprocess.check_output(["{}".format(push_script_path), docker_tag]) logger.debug(output)
def deploy(dir): """ Command to deploy ML model(s) locally """ logger.info(ASCII_LOGO) logger.info("Started local deployment at localhost:8080 ...\n") sagify_module_path = os.path.join(dir, 'sagify') local_deploy_script_path = os.path.join(sagify_module_path, 'local_test', 'deploy_local.sh') test_path = os.path.join(sagify_module_path, 'local_test', 'test_dir') if not os.path.isdir(test_path): logger.info("This is not a sagify directory: {}".format(dir)) sys.exit(-1) try: subprocess.check_output([ "{}".format(local_deploy_script_path), "{}".format(os.path.abspath(test_path)) ]) except Exception as e: logger.info("{}".format(e)) return
def load_kubeconf(config_file): with open(config_file, 'r') as f: config = yaml.load(f) user = [user for user in config['users'] if user['name'] == 'aws'][0]['user'] command = [user['exec']['command']] command.extend(user['exec']['args']) output = subprocess.check_output(command) c = json.loads(output.decode('utf-8')) user['token'] = c['status']['token'] del user['exec'] loader = KubeConfigLoader(config) config = type.__call__(Configuration) loader.load_and_set(config) Configuration.set_default(config)
def test_kafka_build_and_inject(tmpdir): dest = test_kafka_build(tmpdir) kafka_conf = str(check_output([ 'env', 'ENV_INJECT=*.properties:/dev/null,server.properties:-', 'KAFKA_SERVER_ZOOKEEPER_CONNECT=zookeeper:2181/foo/bar', 'KAFKA_SERVER_ASDF=asdf', 'env2config', 'inject', str(dest), ])) assert 'replacing default' in kafka_conf assert 'zookeeper.connect=zookeeper:2181/foo/bar' in kafka_conf assert 'not matching any default' in kafka_conf assert 'asdf=asdf' in kafka_conf
def _futurize_test_script( self, filename="mytestscript.py", stages=(1, 2), all_imports=False, from3=False, conservative=False, ): params = [] stages = list(stages) if all_imports: params.append("--all-imports") if from3: script = "pasteurize.py" else: script = "futurize.py" if stages == [1]: params.append("--stage1") elif stages == [2]: params.append("--stage2") else: assert stages == [1, 2] if conservative: params.append("--conservative") # No extra params needed # Absolute file path: fn = self.tempdir + filename call_args = [sys.executable, script] + params + ["-w", fn] try: output = check_output(call_args, stderr=STDOUT, env=self.env) except CalledProcessError as e: with open(fn) as f: msg = ("Error running the command %s\n" "%s\n" "Contents of file %s:\n" "\n" "%s") % ( " ".join(call_args), "env=%s" % self.env, fn, "----\n%s\n----" % f.read(), ) ErrorClass = FuturizeError if "futurize" in script else PasteurizeError raise ErrorClass(msg, e.returncode, e.cmd, output=e.output) return output
def _run_test_script(self, filename='mytestscript.py', interpreter=sys.executable): # Absolute file path: fn = self.tempdir + filename try: output = check_output([interpreter, fn], env=self.env, stderr=STDOUT) except CalledProcessError as e: msg = ('Error running the command %s\n%s\nContents of file %s:\n\n%s' % (' '.join([interpreter, fn]), 'env=%s' % self.env, fn, '----\n%s\n----' % open(fn).read(), ) ) raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output) return output
def test_redis_build_and_inject(tmpdir): dest = test_redis_build(tmpdir) redis_conf = str(check_output([ 'env', 'ENV_INJECT=redis.conf:-', 'REDIS_APPENDONLY=yes', 'REDIS_ASDF=asdf', 'env2config', 'inject', str(dest), ])) assert 'replacing default' in redis_conf assert 'appendonly yes' in redis_conf assert 'not matching any default' in redis_conf assert 'asdf asdf' in redis_conf
def test_redis_build_and_inject(tmpdir): dest = test_redis_build(tmpdir) redis_conf = str( check_output([ 'env', 'ENV_INJECT=redis.conf:-', 'REDIS_APPENDONLY=yes', 'REDIS_ASDF=asdf', 'env2config', 'inject', str(dest), ])) assert 'replacing default' in redis_conf assert 'appendonly yes' in redis_conf assert 'not matching any default' in redis_conf assert 'asdf asdf' in redis_conf
def check_command_output(command, log, env=None, raise_on_error=True): """ Execute shell command and retrieve command output. :param command: command to execute :param env: a dictionary containing environment variables :param log: logger :param raise_on_error: True to raise subprocess.CalledProcessError on errors :return: the command output :raise: subprocess.CalledProcessError if the command fails """ return _run_command( lambda _command, _env: check_output(_command, env=_env, stderr=subprocess.STDOUT, universal_newlines=True), command, log, env, raise_on_error, )
def test_hadoop_build_and_inject(tmpdir): dest = test_hadoop_build(tmpdir) hadoop_confs = str(check_output([ 'env', 'ENV_INJECT=*.xml:-', 'HADOOP_HDFS_DFS_NAMENODE_RPC-ADDRESS=wacky-namenode', 'HADOOP_YARN_YARN_RESOURCEMANAGER_ADDRESS=wacky-resman', 'HADOOP_MAPRED_MAPREDUCE_JOBTRACKER_JOBHISTORY_LOCATION=wacky-mrhistory', 'env2config', 'inject', str(dest), ])) assert 'wacky-namenode' in hadoop_confs assert 'wacky-resman' in hadoop_confs assert 'wacky-mrhistory' in hadoop_confs assert len(list(re.finditer('Injected by env2config', hadoop_confs))) == 3
def check_command_output(command, env=None, raise_on_error=True): """ Execute shell command and retrieve command output. :param command: command to execute :param env: a dictionary containing environment variables :param raise_on_error: True to raise subprocess.CalledProcessError on errors :return: the command output :raise: subprocess.CalledProcessError if the command fails """ return _run_command( lambda _command, _env: check_output(_command, env=_env, stderr=subprocess.STDOUT, universal_newlines=True), command, env, raise_on_error, )
def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2), all_imports=False, from3=False, conservative=False): params = [] stages = list(stages) if all_imports: params.append('--all-imports') if from3: script = 'pasteurize.py' else: script = 'futurize.py' if stages == [1]: params.append('--stage1') elif stages == [2]: params.append('--stage2') else: assert stages == [1, 2] if conservative: params.append('--conservative') # No extra params needed # Absolute file path: fn = self.tempdir + filename call_args = [sys.executable, script] + params + ['-w', fn] try: output = check_output(call_args, stderr=STDOUT, env=self.env) except CalledProcessError as e: with open(fn) as f: msg = ( 'Error running the command %s\n' '%s\n' 'Contents of file %s:\n' '\n' '%s') % ( ' '.join(call_args), 'env=%s' % self.env, fn, '----\n%s\n----' % f.read(), ) ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError) raise ErrorClass(msg, e.returncode, e.cmd, output=e.output) return output
def deploy(dir, docker_tag): """ Deploys ML models(s) locally :param dir: [str], source root directory :param docker_tag: [str], the Docker tag for the image """ sagify_module_path = os.path.join(dir, 'sagify') local_deploy_script_path = os.path.join(sagify_module_path, 'local_test', 'deploy_local.sh') test_path = os.path.join(sagify_module_path, 'local_test', 'test_dir') if not os.path.isdir(test_path): raise ValueError("This is not a sagify directory: {}".format(dir)) output = subprocess.check_output([ "{}".format(local_deploy_script_path), "{}".format(os.path.abspath(test_path)), docker_tag ]) logger.debug(output)
def train(dir, docker_tag, image_name): """ Trains ML model(s) locally :param dir: [str], source root directory :param docker_tag: [str], the Docker tag for the image :param image_name: [str], The name of the Docker image """ sagify_module_path = os.path.join(dir, 'sagify') local_train_script_path = os.path.join(sagify_module_path, 'local_test', 'train_local.sh') test_path = os.path.join(sagify_module_path, 'local_test', 'test_dir') if not os.path.isdir(test_path): raise ValueError("This is not a sagify directory: {}".format(dir)) output = subprocess.check_output([ "{}".format(local_train_script_path), "{}".format(os.path.abspath(test_path)), docker_tag, image_name ]) logger.debug(output)
def test_hadoop_build_and_inject(tmpdir): dest = test_hadoop_build(tmpdir) hadoop_confs = str( check_output( [ "env", "ENV_INJECT=*.xml:-", "HADOOP_HDFS_DFS_NAMENODE_RPC-ADDRESS=wacky-namenode", "HADOOP_YARN_YARN_RESOURCEMANAGER_ADDRESS=wacky-resman", "HADOOP_MAPRED_MAPREDUCE_JOBTRACKER_JOBHISTORY_LOCATION=wacky-mrhistory", "env2config", "inject", str(dest), ] ) ) assert "wacky-namenode" in hadoop_confs assert "wacky-resman" in hadoop_confs assert "wacky-mrhistory" in hadoop_confs assert len(list(re.finditer("Injected by env2config", hadoop_confs))) == 3
def _run_test_script(self, filename='mytestscript.py', interpreter=sys.executable): # Absolute file path: fn = self.tempdir + filename try: output = check_output([interpreter, fn], env=self.env, stderr=STDOUT) except CalledProcessError as e: msg = ( 'Error running the command %s\n%s\nContents of file %s:\n\n%s' % ( ' '.join([interpreter, fn]), 'env=%s' % self.env, fn, '----\n%s\n----' % open(fn).read(), )) ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError) raise ErrorClass(msg, e.returncode, e.cmd) return output
secFiles = cms.untracked.vstring() eventsToProcess = [] # Query DAS for a ROOT file for every lumisection for ls in range(options.minLumi, options.maxLumi + 1): if fnmatch.fnmatch(str(ls), options.lumiPattern): read, sec = filesFromDASQuery("file run=%d dataset=%s lumi=%s" % (options.runNumber, options.dataset, ls)) readFiles.extend(read) secFiles.extend(sec) # Get last eventsPerLumi of events in this file command = "edmFileUtil --catalog file:/cvmfs/cms-ib.cern.ch/SITECONF/local/PhEDEx/storage.xml?protocol=xrootd --events %s | tail -n +9 | head -n -5 | awk '{ print $3 }'" % read[ 0] print(command) events = subprocess.check_output(command, shell=True) events = events.split(b'\n') events = filter(lambda x: x != b"", events) events = map(int, events) events = sorted(events) events = events[-options.eventsPerLumi:] eventsToProcess.append("%s:%s:%s-%s:%s:%s" % (options.runNumber, ls, events[0], options.runNumber, ls, events[-1])) eventRange = cms.untracked.VEventRange(eventsToProcess) print("Got %d files." % len(readFiles)) source = cms.Source("PoolSource", fileNames=readFiles,