def put(*args, **kwargs):
    if len(args) < 2:
        raise Exception(
            "error param for hadoop_put, at least src_path and dest_path")
    cmd = '{}/bin/hadoop fs -put {}'.format(HADOOP_HOME, " ".join(args))
    if kwargs.get("print_cmd", False):
        logging.info(cmd)
    return shell_wrapper.shell_command(cmd=cmd)
def getmerge(src, localdst, **kwargs):
    if not exists(src):
        logging.error("hdfs path [%s] not found, skip downloading.", src)
        return False
    cmd = '{}/bin/hadoop fs -getmerge {} {}'.format(HADOOP_HOME, src, localdst)
    if kwargs.get("print_cmd", False):
        logging.info(cmd)
    return shell_wrapper.shell_command(cmd=cmd)
def exists(hdfs_path, **kwargs):
    try:
        cmd = '{}/bin/hadoop fs -test -e {}'.format(HADOOP_HOME, hdfs_path)
        if kwargs.get("print_cmd", False):
            logging.info(cmd)
        return shell_wrapper.shell_command(cmd=cmd)
    except Exception as ex:
        logging.exception("exception occur", exc_info=True)
    return False
def rm(*hdfs_paths, **kwargs):
    for hdfs_path in hdfs_paths:
        if not exists(hdfs_path):
            continue
        cmd = '{}/bin/hadoop fs -rm {}'.format(HADOOP_HOME, hdfs_path)
        if kwargs.get("print_cmd", False):
            logging.info(cmd)
        if not shell_wrapper.shell_command(cmd=cmd):
            return False
    return True
def mkdir(*hdfs_paths, **kwargs):
    for hdfs_path in hdfs_paths:
        if exists(hdfs_path):
            logging.warning("hdfs path [%s] exists already, skip mkdir",
                            hdfs_path)
            continue
        cmd = '{}/bin/hadoop fs -mkdir -p {}'.format(HADOOP_HOME, hdfs_path)
        if kwargs.get("print_cmd", False):
            logging.info(cmd)
        if not shell_wrapper.shell_command(cmd=cmd):
            return False
    return True
def touchz(*hdfs_files, **kwargs):
    for hdfs_file in hdfs_files:
        if exists(hdfs_file):
            logging.warning("hdfs file [%s] exists already, skip touching",
                            hdfs_file)
            continue
        cmd = '{}/bin/hadoop fs -touchz {}'.format(HADOOP_HOME, hdfs_file)
        if kwargs.get("print_cmd", False):
            logging.info(cmd)
        if not shell_wrapper.shell_command(cmd=cmd):
            return False
    return True
def rmr(*hdfs_paths, **kwargs):
    for hdfs_path in hdfs_paths:
        if hdfs_path.replace("///", "/").replace(
                "//", "/").rstrip("/").count('/') < kwargs.get(
                    "safe_depth", 4):
            logging.warning(
                "hdfs path [%s] failed depth check, skip deleting.", hdfs_path)
            return False
        if not exists(hdfs_path):
            logging.warning("hdfs path [%s] not found, skip deleting.",
                            hdfs_path)
            continue
        cmd = '{}/bin/hadoop fs -rm -r {}'.format(HADOOP_HOME, hdfs_path)
        if kwargs.get("print_cmd", False):
            logging.info(cmd)
        if not shell_wrapper.shell_command(cmd=cmd):
            return False
    return True
def mv(*args, **kwargs):
    if len(args) < 2:
        raise Exception(
            "error param for hadoop_mv, at least src_path and dest_path")

    cmd = "{}/bin/hadoop fs -mv {}".format(HADOOP_HOME, " ".join(args))
    if kwargs.get("print_cmd", False):
        logging.info(cmd)

    if len(args) > 2:
        # This command allows multiple sources as well in which case the destination needs to be an existing directory.
        fstat = stat(args[-1])
        if fstat is None or fstat.is_file:
            logging.error(
                "destination [%s] needs to be an existing directory.",
                args[-1])
            return False
    return shell_wrapper.shell_command(cmd=cmd)
Beispiel #9
0
 def run(self, **kwargs):
     cmd = self.build_cmd()
     if kwargs.get("print_cmd", False):
         logging.info(cmd)
     return shell_wrapper.shell_command(cmd=cmd, print_info=True, print_error=True)