def _copy_predictive_object_files(source_path, target_path, is_dir, src_credentials, tgt_credentials):
    '''
    Copy either file or folder from source location to target location
    '''
    # Cleanup existing file path if exists
    if _file_util.is_local_path(target_path) and _os.path.exists(target_path):
        _shutil.rmtree(target_path)

    if _file_util.is_s3_path(source_path) and _file_util.is_s3_path(target_path):

        # compare credentials
        _check_aws_credentials(src_credentials, tgt_credentials, source_path)

        # intra s3 copy model
        _file_util.intra_s3_copy_model(source_path, target_path, is_dir, tgt_credentials)
    elif _file_util.is_local_path(source_path):

        _file_util.copy_from_local(source_path, target_path, is_dir = is_dir)

    else:
        tmp_dir = _tempfile.mkdtemp(prefix = 'copy_predictive_object')
        try:
            # download to local first
            local_path = _os.path.join(tmp_dir, 'temp_po_file')
            if _file_util.is_s3_path(source_path):
                _file_util.download_from_s3(
                    source_path,
                    local_path,
                    is_dir=is_dir,
                    aws_credentials=src_credentials,
                    silent=False)
            elif _file_util.is_hdfs_path(source_path):
                _file_util.download_from_hdfs(source_path, local_path, is_dir = False)
            else:
                raise RuntimeError('Unsupported file system type: %s' % source_path)

            # upload from local to remote
            if _file_util.is_s3_path(target_path):
                _file_util.upload_to_s3(local_path, target_path, is_dir=is_dir,
                    aws_credentials=tgt_credentials, silent=False)
            elif _file_util.is_hdfs_path(target_path):
                _file_util.hdfs_mkdir(target_path)
                _file_util.upload_to_hdfs(local_path, target_path, force=True, silent=False)
            else:
                _file_util.upload_to_local(local_path, target_path, is_dir=is_dir, silent=False)

        finally:
            _shutil.rmtree(tmp_dir)
Esempio n. 2
0
def subprocess_exe(exe, args, setup=None, teardown=None, out_log_prefix=None, environment_variables=None):
    """
    Wrapper function to execute an external program.
    This function is exception safe, and always catches
    the error.

    Parameters
    ----------
    exe : str
        The command to run
    args : list[str]
        Arguments to passed to the command
    setup : function
        Setup function to run before executing the command
    teardown : function
        Teardown function to run after executing the command
    out_log_prefix: str
        The path prefix to the saved log file.
        If set, the logs will be save to the following locations:
            <prefix>.stdout
            <prefix>.stderr
        and the return value will contain paths to the log files.
        The path can be local or hdfs or s3.

    Return
    ------
    out : dict
        A dictionary containing the following keys:

        success : bool
            True if the command succeeded
        return_code : int
            The return code of the command
        stderr : str
            Path to the stderr log of the process
        stdout : str
            Path to the stdout log of the process
        python_exception : Exception
            Python exception
    """
    import logging
    import os
    ret = {'success': True,
           'return_code': None,
           'stdout': None,
           'stderr': None,
           'python_exception': None}

    # Creates local log files
    try:
        local_log_stdout = tempfile.NamedTemporaryFile(delete=False)
        local_log_stderr = tempfile.NamedTemporaryFile(delete=False)
    except Exception as e:
        ret['success'] = False
        ret['python_exception'] = e

   # Run setup
    try:
        if setup is not None:
            setup()
    except Exception as e:
        ret['success'] = False
        ret['python_exception'] = e

   # Executes the command
    if ret['success']:
        try:
            if environment_variables is not None:
                environment_variables = os.environ.copy().update(environment_variables)
            proc = subprocess.Popen([exe] + args,
                                    stdout=local_log_stdout,
                                    stderr=local_log_stderr,
                                    env=environment_variables)
            proc.communicate()
            ret['success'] = proc.returncode == 0
            ret['return_code'] = proc.returncode
        except Exception as e:
            ret['success'] = False
            ret['python_exception'] = e
        finally:
            try:
                local_log_stdout.close()
                local_log_stderr.close()
                if out_log_prefix is not None:
                    # persistent logfiles. When local log closed,
                    # they will be loaded to the corresponding hdfs or s3 path
                    file_log_stdout = out_log_prefix + '.stdout'
                    file_log_stderr = out_log_prefix + '.stderr'
                    # copy to target log path
                    file_util.copy_from_local(local_log_stdout.name, file_log_stdout)
                    file_util.copy_from_local(local_log_stderr.name, file_log_stderr)
                    ret['stdout'] = file_log_stdout
                    ret['stderr'] = file_log_stderr
                else:
                    ret['stdout'] = open(local_log_stdout.name).read()
                    ret['stderr'] = open(local_log_stderr.name).read()

                os.remove(local_log_stdout.name)
                os.remove(local_log_stderr.name)
            except Exception as e:
                ret['_save_log_exception'] = e
                logging.warn(str(e))

    # Teardown
    if teardown is not None:
        try:
            teardown()
        except Exception as e:
            ret['_tear_down_exception'] = e
            logging.warn(str(e))

    return ret