Exemple #1
0
 def check(self, args):
     self.root = args["root"]
     self.hdfs_root = args["hdfs_root"]
     print("checking: %s" % self.root)
     print("checking hdfs: %s" % self.hdfs_root)
     if path.isdir(self.hdfs_root) == False:
         return False
     if os.path.isdir(self.root) == False:
         return False
     return self.walk(self.root, os.listdir(self.root), self.hdfs_root,
                      hdfs.ls(self.hdfs_root))
Exemple #2
0
def isdir(hdfs_path, project=None):
    """
    Return True if path refers to a directory.

    Args:
        :hdfs_path: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS).
        :project: If this value is not specified, it will get the path to your project. If you need to path to another project, you can specify the name of the project as a string.

    Returns:
        True if path refers to a file.

    Raises: IOError
    """
    if project == None:
        project = project_name()
    hdfs_path = _expand_path(hdfs_path, project)
    return path.isdir(hdfs_path)
Exemple #3
0
    def walk(self, parent_path, file_list, hdfs_parent_path, hdfs_file_list):
        print("%s %s" % (parent_path, hdfs_parent_path))
        if len(file_list) == 0 and len(hdfs_file_list) == 0:
            if os.path.basename(parent_path) == path.basename(
                    hdfs_parent_path):
                return True
            return False
        elif len(file_list) != len(hdfs_file_list):
            print("No match: number of files in dirs")
            return False
        else:
            file_list.sort(
                key=lambda f: os.path.isfile(os.path.join(parent_path, f)))
            hdfs_file_list.sort(
                key=lambda f: path.isfile(path.join(hdfs_parent_path, f)))
            hIdx = 0
            for idx, sub_path in enumerate(file_list):
                full_path = os.path.join(parent_path, sub_path)
                hdfs_sub_path = hdfs_file_list[idx]
                hdfs_full_path = path.join(hdfs_parent_path, hdfs_sub_path)

                if (os.path.basename(sub_path) !=
                        path.basename(hdfs_sub_path)):
                    print("No match: %s and %s" % (sub_path, hdfs_sub_path))
                    return False

                if os.path.isdir(full_path):
                    if path.isdir(hdfs_full_path) == False:
                        print("No match on directory: %s and %s" %
                              (full_path, hdfs_full_path))
                        return False
                    return self.walk(full_path, os.listdir(full_path),
                                     hdfs_full_path, hdfs.ls(hdfs_full_path))
                elif os.path.isfile(full_path):
                    sz = os.path.getsize(full_path)
                    hdfs_size = path.getsize(hdfs_full_path)
                    if (hdfs_size != sz):
                        return False

        return True