Beispiel #1
0
def train(dir):
    """
    Command to train ML model(s) locally
    """
    logger.info(ASCII_LOGO)
    logger.info("Started local training...\n")

    sagify_module_path = os.path.join(dir, 'sagify')
    local_train_script_path = os.path.join(sagify_module_path, 'local_test',
                                           'train_local.sh')
    test_path = os.path.join(sagify_module_path, 'local_test', 'test_dir')

    if not os.path.isdir(test_path):
        logger.info("This is not a sagify directory: {}".format(dir))
        sys.exit(-1)

    try:
        subprocess.check_output([
            "{}".format(local_train_script_path),
            "{}".format(os.path.abspath(test_path))
        ])

        logger.info("Local training completed successfully!")
    except Exception as e:
        logger.info("{}".format(e))
        return
Beispiel #2
0
 def _run_test_script(self,
                      filename='mytestscript.py',
                      interpreter=sys.executable):
     # Absolute file path:
     fn = self.tempdir + filename
     try:
         output = check_output([interpreter, fn],
                               env=self.env,
                               stderr=STDOUT)
     except CalledProcessError as e:
         with open(fn) as f:
             msg = ('Error running the command %s\n'
                    '%s\n'
                    'Contents of file %s:\n'
                    '\n'
                    '%s') % (
                        ' '.join([interpreter, fn]),
                        'env=%s' % self.env,
                        fn,
                        '----\n%s\n----' % f.read(),
                    )
         if not hasattr(e, 'output'):
             # The attribute CalledProcessError.output doesn't exist on Py2.6
             e.output = None
         raise VerboseCalledProcessError(msg,
                                         e.returncode,
                                         e.cmd,
                                         output=e.output)
     return output
Beispiel #3
0
def build(dir, requirements_dir, docker_tag):
    """
    Builds a Docker image that contains code under the given source root directory.

    Assumes that Docker is installed and running locally.

    :param dir: [str], source root directory
    :param requirements_dir: [str], path to requirements.txt
    """
    sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/'))

    build_script_path = os.path.join(sagify_module_path, 'build.sh')
    dockerfile_path = os.path.join(sagify_module_path, 'Dockerfile')

    train_file_path = os.path.join(sagify_module_path, 'training', 'train')
    serve_file_path = os.path.join(sagify_module_path, 'prediction', 'serve')
    executor_file_path = os.path.join(sagify_module_path, 'executor.sh')

    if not os.path.isfile(build_script_path) or not os.path.isfile(train_file_path) or not \
            os.path.isfile(serve_file_path):
        raise ValueError("This is not a sagify directory: {}".format(dir))

    os.chmod(train_file_path, 0o777)
    os.chmod(serve_file_path, 0o777)
    os.chmod(executor_file_path, 0o777)

    target_dir_name = os.path.basename(os.path.normpath(dir))

    output = subprocess.check_output([
        "{}".format(build_script_path), "{}".format(os.path.relpath(dir)),
        "{}".format(os.path.relpath(target_dir_name)),
        "{}".format(dockerfile_path),
        "{}".format(os.path.relpath(requirements_dir)), docker_tag
    ])
    logger.debug(output)
Beispiel #4
0
def push(dir, docker_tag, aws_region, iam_role_arn, aws_profile, external_id,
         image_name):
    """
    Push Docker image to AWS ECS

    :param dir: [str], source root directory
    :param docker_tag: [str], the Docker tag for the image
    :param aws_region: [str], the AWS region to push the image to
    :param iam_role_arn: [str], the AWS role used to push the image to ECR
    :param aws_profile: [str], the AWS profile used to push the image to ECR
    :param external_id: [str], Optional external id used when using an IAM role
    :param image_name: [str], The name of the Docker image
    """

    sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/'))
    push_script_path = os.path.join(sagify_module_path, 'push.sh')

    if not os.path.isfile(push_script_path):
        raise ValueError("This is not a sagify directory: {}".format(dir))

    output = subprocess.check_output([
        "{}".format(push_script_path), docker_tag, aws_region, iam_role_arn,
        aws_profile, external_id, image_name
    ])
    logger.debug(output)
Beispiel #5
0
 def _run_test_script(self, filename='mytestscript.py',
                      interpreter=sys.executable):
     # Absolute file path:
     fn = self.tempdir + filename
     try:
         output = check_output([interpreter, fn],
                               env=self.env, stderr=STDOUT)
     except CalledProcessError as e:
         with open(fn) as f:
             msg = (
                 'Error running the command %s\n'
                 '%s\n'
                 'Contents of file %s:\n'
                 '\n'
                 '%s') % (
                     ' '.join([interpreter, fn]),
                     'env=%s' % self.env,
                     fn,
                     '----\n%s\n----' % f.read(),
                 )
         if not hasattr(e, 'output'):
             # The attribute CalledProcessError.output doesn't exist on Py2.6
             e.output = None
         raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output)
     return output
Beispiel #6
0
def download_apk(storage, handle, tmp_dir, apk_name, apk_tmp):
    """
    Download the APK from Google Play for the given handle.
    :param storage: minio storage helper
    :param handle: application handle to download
    :param tmp_dir: download destination directory
    :param apk_name: name of the APK in Minio storage
    :param apk_tmp: apk temporary name
    :return: True if succeed, False otherwise
    """
    DEVICE_CODE_NAMES = [
        'bacon', 'hammerhead', 'manta', 'cloudbook', 'bullhead'
    ]
    RETRY_PER_DEVICE = 3

    refreshed_token = False
    for device in DEVICE_CODE_NAMES:
        cmd = 'gplaycli -v -a -y -pd {} -dc {} -f {}/'.format(
            handle, device, tmp_dir)
        for i in range(RETRY_PER_DEVICE):
            try:
                output = subprocess.check_output(
                    shlex.split(cmd),
                    stderr=subprocess.STDOUT,
                    timeout=240  # Timeout of 4 minutes
                )
                output_str = output.decode('utf-8')
                logging.info(output_str)
                if '[ERROR]' in output_str:
                    filtered = output_str.replace(
                        '[ERROR] cache file does not exists or is corrupted',
                        '')
                    if '[ERROR]' in filtered:
                        raise RuntimeError('Error while downloading apk file')

                apk = Path(apk_tmp)
                if apk.is_file():
                    try:
                        storage.put_file(apk_tmp, apk_name)
                        return True
                    except ResponseError as err:
                        logging.info(err)
                        return False

            except TimeoutExpired:
                logging.warning("Timeout of gplaycli download command")
                return False
            except Exception as e:
                logging.info(e)

            logging.info("Could not download with device {}".format(device))
            if not refreshed_token:
                remove_token()
                refreshed_token = True
            time.sleep(2)

    return False
Beispiel #7
0
def build(dir, requirements_dir):
    """
    Command to build SageMaker app
    """
    logger.info(ASCII_LOGO)
    logger.info(
        "Started building SageMaker Docker image. It will take some minutes...\n"
    )

    sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/'))

    build_script_path = os.path.join(sagify_module_path, 'build.sh')
    dockerfile_path = os.path.join(sagify_module_path, 'Dockerfile')

    train_file_path = os.path.join(sagify_module_path, 'training', 'train')
    serve_file_path = os.path.join(sagify_module_path, 'prediction', 'serve')
    executor_file_path = os.path.join(sagify_module_path, 'executor.sh')

    if not os.path.isfile(build_script_path) or not os.path.isfile(train_file_path) or not \
            os.path.isfile(serve_file_path):
        logger.info("This is not a sagify directory: {}".format(dir))
        sys.exit(-1)

    os.chmod(train_file_path, 0o777)
    os.chmod(serve_file_path, 0o777)
    os.chmod(executor_file_path, 0o777)

    target_dir_name = os.path.basename(os.path.normpath(dir))

    try:
        subprocess.check_output([
            "{}".format(build_script_path), "{}".format(os.path.relpath(dir)),
            "{}".format(os.path.relpath(target_dir_name)),
            "{}".format(dockerfile_path),
            "{}".format(os.path.relpath(requirements_dir))
        ])

        logger.info("Docker image built successfully!")
    except Exception as e:
        logger.info("{}".format(e))
        return
Beispiel #8
0
    def _futurize_test_script(self,
                              filename='mytestscript.py',
                              stages=(1, 2),
                              all_imports=False,
                              from3=False,
                              conservative=False):
        params = []
        stages = list(stages)
        if all_imports:
            params.append('--all-imports')
        if from3:
            script = 'pasteurize.py'
        else:
            script = 'futurize.py'
            if stages == [1]:
                params.append('--stage1')
            elif stages == [2]:
                params.append('--stage2')
            else:
                assert stages == [1, 2]
            if conservative:
                params.append('--conservative')
            # No extra params needed

        # Absolute file path:
        fn = self.tempdir + filename
        call_args = [sys.executable, script] + params + ['-w', fn]
        try:
            output = check_output(call_args, stderr=STDOUT, env=self.env)
        except CalledProcessError as e:
            with open(fn) as f:
                msg = ('Error running the command %s\n'
                       '%s\n'
                       'Contents of file %s:\n'
                       '\n'
                       '%s') % (
                           ' '.join(call_args),
                           'env=%s' % self.env,
                           fn,
                           '----\n%s\n----' % f.read(),
                       )
            ErrorClass = (FuturizeError
                          if 'futurize' in script else PasteurizeError)

            if not hasattr(e, 'output'):
                # The attribute CalledProcessError.output doesn't exist on Py2.6
                e.output = None
            raise ErrorClass(msg, e.returncode, e.cmd, output=e.output)
        return output
Beispiel #9
0
def push(dir):
    """
    Command to push Docker image to AWS ECS
    """
    logger.info(ASCII_LOGO)
    logger.info(
        "Started pushing Docker image to AWS ECS. It will take some time. Please, be patient...\n"
    )

    sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/'))

    push_script_path = os.path.join(sagify_module_path, 'push.sh')

    if not os.path.isfile(push_script_path):
        logger.info("This is not a sagify directory: {}".format(dir))
        sys.exit(-1)

    try:
        subprocess.check_output(["{}".format(push_script_path)])

        logger.info("Docker image pushed to ECS successfully!")
    except Exception as e:
        logger.info("{}".format(e))
        return
Beispiel #10
0
def push(dir, docker_tag):
    """
    Push Docker image to AWS ECS

    :param dir: [str], source root directory
    """
    sagify_module_path = os.path.relpath(os.path.join(dir, 'sagify/'))

    push_script_path = os.path.join(sagify_module_path, 'push.sh')

    if not os.path.isfile(push_script_path):
        raise ValueError("This is not a sagify directory: {}".format(dir))

    output = subprocess.check_output(["{}".format(push_script_path), docker_tag])
    logger.debug(output)
Beispiel #11
0
def deploy(dir):
    """
    Command to deploy ML model(s) locally
    """
    logger.info(ASCII_LOGO)
    logger.info("Started local deployment at localhost:8080 ...\n")

    sagify_module_path = os.path.join(dir, 'sagify')
    local_deploy_script_path = os.path.join(sagify_module_path, 'local_test',
                                            'deploy_local.sh')
    test_path = os.path.join(sagify_module_path, 'local_test', 'test_dir')

    if not os.path.isdir(test_path):
        logger.info("This is not a sagify directory: {}".format(dir))
        sys.exit(-1)

    try:
        subprocess.check_output([
            "{}".format(local_deploy_script_path),
            "{}".format(os.path.abspath(test_path))
        ])
    except Exception as e:
        logger.info("{}".format(e))
        return
Beispiel #12
0
def load_kubeconf(config_file):
    with open(config_file, 'r') as f:
        config = yaml.load(f)
        user = [user for user in config['users']
                if user['name'] == 'aws'][0]['user']
        command = [user['exec']['command']]
        command.extend(user['exec']['args'])
        output = subprocess.check_output(command)
        c = json.loads(output.decode('utf-8'))
        user['token'] = c['status']['token']
        del user['exec']

        loader = KubeConfigLoader(config)
        config = type.__call__(Configuration)
        loader.load_and_set(config)
        Configuration.set_default(config)
Beispiel #13
0
def test_kafka_build_and_inject(tmpdir):
    dest = test_kafka_build(tmpdir)

    kafka_conf = str(check_output([
        'env',
        'ENV_INJECT=*.properties:/dev/null,server.properties:-',
        'KAFKA_SERVER_ZOOKEEPER_CONNECT=zookeeper:2181/foo/bar',
        'KAFKA_SERVER_ASDF=asdf',
        'env2config',
        'inject',
        str(dest),
    ]))

    assert 'replacing default' in kafka_conf
    assert 'zookeeper.connect=zookeeper:2181/foo/bar' in kafka_conf
    assert 'not matching any default' in kafka_conf
    assert 'asdf=asdf' in kafka_conf
Beispiel #14
0
    def _futurize_test_script(
            self,
            filename="mytestscript.py",
            stages=(1, 2),
            all_imports=False,
            from3=False,
            conservative=False,
    ):
        params = []
        stages = list(stages)
        if all_imports:
            params.append("--all-imports")
        if from3:
            script = "pasteurize.py"
        else:
            script = "futurize.py"
            if stages == [1]:
                params.append("--stage1")
            elif stages == [2]:
                params.append("--stage2")
            else:
                assert stages == [1, 2]
            if conservative:
                params.append("--conservative")
            # No extra params needed

        # Absolute file path:
        fn = self.tempdir + filename
        call_args = [sys.executable, script] + params + ["-w", fn]
        try:
            output = check_output(call_args, stderr=STDOUT, env=self.env)
        except CalledProcessError as e:
            with open(fn) as f:
                msg = ("Error running the command %s\n"
                       "%s\n"
                       "Contents of file %s:\n"
                       "\n"
                       "%s") % (
                           " ".join(call_args),
                           "env=%s" % self.env,
                           fn,
                           "----\n%s\n----" % f.read(),
                       )
            ErrorClass = FuturizeError if "futurize" in script else PasteurizeError
            raise ErrorClass(msg, e.returncode, e.cmd, output=e.output)
        return output
Beispiel #15
0
 def _run_test_script(self, filename='mytestscript.py',
                      interpreter=sys.executable):
     # Absolute file path:
     fn = self.tempdir + filename
     try:
         output = check_output([interpreter, fn],
                               env=self.env, stderr=STDOUT)
     except CalledProcessError as e:
         msg = ('Error running the command %s\n%s\nContents of file %s:\n\n%s' %
                (' '.join([interpreter, fn]),
                 'env=%s' % self.env,
                 fn,
                 '----\n%s\n----' % open(fn).read(),
                )
               )
         raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output)
     return output
Beispiel #16
0
def test_redis_build_and_inject(tmpdir):
    dest = test_redis_build(tmpdir)

    redis_conf = str(check_output([
        'env',
        'ENV_INJECT=redis.conf:-',
        'REDIS_APPENDONLY=yes',
        'REDIS_ASDF=asdf',
        'env2config',
        'inject',
        str(dest),
    ]))

    assert 'replacing default' in redis_conf
    assert 'appendonly yes' in redis_conf
    assert 'not matching any default' in redis_conf
    assert 'asdf asdf' in redis_conf
Beispiel #17
0
def test_redis_build_and_inject(tmpdir):
    dest = test_redis_build(tmpdir)

    redis_conf = str(
        check_output([
            'env',
            'ENV_INJECT=redis.conf:-',
            'REDIS_APPENDONLY=yes',
            'REDIS_ASDF=asdf',
            'env2config',
            'inject',
            str(dest),
        ]))

    assert 'replacing default' in redis_conf
    assert 'appendonly yes' in redis_conf
    assert 'not matching any default' in redis_conf
    assert 'asdf asdf' in redis_conf
Beispiel #18
0
def check_command_output(command, log, env=None, raise_on_error=True):
    """
    Execute shell command and retrieve command output.

    :param command: command to execute
    :param env: a dictionary containing environment variables
    :param log: logger
    :param raise_on_error: True to raise subprocess.CalledProcessError on errors
    :return: the command output
    :raise: subprocess.CalledProcessError if the command fails
    """
    return _run_command(
        lambda _command, _env: check_output(_command, env=_env, stderr=subprocess.STDOUT, universal_newlines=True),
        command,
        log,
        env,
        raise_on_error,
    )
Beispiel #19
0
def test_hadoop_build_and_inject(tmpdir):
    dest = test_hadoop_build(tmpdir)
    hadoop_confs = str(check_output([
        'env',
        'ENV_INJECT=*.xml:-',
        'HADOOP_HDFS_DFS_NAMENODE_RPC-ADDRESS=wacky-namenode',
        'HADOOP_YARN_YARN_RESOURCEMANAGER_ADDRESS=wacky-resman',
        'HADOOP_MAPRED_MAPREDUCE_JOBTRACKER_JOBHISTORY_LOCATION=wacky-mrhistory',
        'env2config',
        'inject',
        str(dest),
    ]))

    assert 'wacky-namenode' in hadoop_confs
    assert 'wacky-resman' in hadoop_confs
    assert 'wacky-mrhistory' in hadoop_confs

    assert len(list(re.finditer('Injected by env2config', hadoop_confs))) == 3
def check_command_output(command, env=None, raise_on_error=True):
    """
    Execute shell command and retrieve command output.

    :param command: command to execute
    :param env: a dictionary containing environment variables
    :param raise_on_error: True to raise subprocess.CalledProcessError on errors
    :return: the command output
    :raise: subprocess.CalledProcessError if the command fails
    """
    return _run_command(
        lambda _command, _env: check_output(_command,
                                            env=_env,
                                            stderr=subprocess.STDOUT,
                                            universal_newlines=True),
        command,
        env,
        raise_on_error,
    )
Beispiel #21
0
    def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2),
                              all_imports=False, from3=False,
                              conservative=False):
        params = []
        stages = list(stages)
        if all_imports:
            params.append('--all-imports')
        if from3:
            script = 'pasteurize.py'
        else:
            script = 'futurize.py'
            if stages == [1]:
                params.append('--stage1')
            elif stages == [2]:
                params.append('--stage2')
            else:
                assert stages == [1, 2]
            if conservative:
                params.append('--conservative')
            # No extra params needed

        # Absolute file path:
        fn = self.tempdir + filename
        call_args = [sys.executable, script] + params + ['-w', fn]
        try:
            output = check_output(call_args, stderr=STDOUT, env=self.env)
        except CalledProcessError as e:
            with open(fn) as f:
                msg = (
                    'Error running the command %s\n'
                    '%s\n'
                    'Contents of file %s:\n'
                    '\n'
                    '%s') % (
                        ' '.join(call_args),
                        'env=%s' % self.env,
                        fn,
                        '----\n%s\n----' % f.read(),
                    )
            ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError)
            raise ErrorClass(msg, e.returncode, e.cmd, output=e.output)
        return output
Beispiel #22
0
def deploy(dir, docker_tag):
    """
    Deploys ML models(s) locally

    :param dir: [str], source root directory
    :param docker_tag: [str], the Docker tag for the image
    """
    sagify_module_path = os.path.join(dir, 'sagify')
    local_deploy_script_path = os.path.join(sagify_module_path, 'local_test',
                                            'deploy_local.sh')
    test_path = os.path.join(sagify_module_path, 'local_test', 'test_dir')

    if not os.path.isdir(test_path):
        raise ValueError("This is not a sagify directory: {}".format(dir))

    output = subprocess.check_output([
        "{}".format(local_deploy_script_path),
        "{}".format(os.path.abspath(test_path)), docker_tag
    ])
    logger.debug(output)
Beispiel #23
0
def train(dir, docker_tag, image_name):
    """
    Trains ML model(s) locally

    :param dir: [str], source root directory
    :param docker_tag: [str], the Docker tag for the image
    :param image_name: [str], The name of the Docker image
    """
    sagify_module_path = os.path.join(dir, 'sagify')
    local_train_script_path = os.path.join(sagify_module_path, 'local_test',
                                           'train_local.sh')
    test_path = os.path.join(sagify_module_path, 'local_test', 'test_dir')

    if not os.path.isdir(test_path):
        raise ValueError("This is not a sagify directory: {}".format(dir))

    output = subprocess.check_output([
        "{}".format(local_train_script_path),
        "{}".format(os.path.abspath(test_path)), docker_tag, image_name
    ])
    logger.debug(output)
Beispiel #24
0
def test_hadoop_build_and_inject(tmpdir):
    dest = test_hadoop_build(tmpdir)
    hadoop_confs = str(
        check_output(
            [
                "env",
                "ENV_INJECT=*.xml:-",
                "HADOOP_HDFS_DFS_NAMENODE_RPC-ADDRESS=wacky-namenode",
                "HADOOP_YARN_YARN_RESOURCEMANAGER_ADDRESS=wacky-resman",
                "HADOOP_MAPRED_MAPREDUCE_JOBTRACKER_JOBHISTORY_LOCATION=wacky-mrhistory",
                "env2config",
                "inject",
                str(dest),
            ]
        )
    )

    assert "wacky-namenode" in hadoop_confs
    assert "wacky-resman" in hadoop_confs
    assert "wacky-mrhistory" in hadoop_confs

    assert len(list(re.finditer("Injected by env2config", hadoop_confs))) == 3
Beispiel #25
0
 def _run_test_script(self,
                      filename='mytestscript.py',
                      interpreter=sys.executable):
     # Absolute file path:
     fn = self.tempdir + filename
     try:
         output = check_output([interpreter, fn],
                               env=self.env,
                               stderr=STDOUT)
     except CalledProcessError as e:
         msg = (
             'Error running the command %s\n%s\nContents of file %s:\n\n%s'
             % (
                 ' '.join([interpreter, fn]),
                 'env=%s' % self.env,
                 fn,
                 '----\n%s\n----' % open(fn).read(),
             ))
         ErrorClass = (FuturizeError
                       if 'futurize' in script else PasteurizeError)
         raise ErrorClass(msg, e.returncode, e.cmd)
     return output
secFiles = cms.untracked.vstring()
eventsToProcess = []

# Query DAS for a ROOT file for every lumisection
for ls in range(options.minLumi, options.maxLumi + 1):
    if fnmatch.fnmatch(str(ls), options.lumiPattern):
        read, sec = filesFromDASQuery("file run=%d dataset=%s lumi=%s" %
                                      (options.runNumber, options.dataset, ls))
        readFiles.extend(read)
        secFiles.extend(sec)

        # Get last eventsPerLumi of events in this file
        command = "edmFileUtil --catalog file:/cvmfs/cms-ib.cern.ch/SITECONF/local/PhEDEx/storage.xml?protocol=xrootd --events %s | tail -n +9 | head -n -5 | awk '{ print $3 }'" % read[
            0]
        print(command)
        events = subprocess.check_output(command, shell=True)
        events = events.split(b'\n')
        events = filter(lambda x: x != b"", events)
        events = map(int, events)
        events = sorted(events)
        events = events[-options.eventsPerLumi:]
        eventsToProcess.append("%s:%s:%s-%s:%s:%s" %
                               (options.runNumber, ls, events[0],
                                options.runNumber, ls, events[-1]))

eventRange = cms.untracked.VEventRange(eventsToProcess)

print("Got %d files." % len(readFiles))

source = cms.Source("PoolSource",
                    fileNames=readFiles,