def check(self, args): self.root = args["root"] self.hdfs_root = args["hdfs_root"] print("checking: %s" % self.root) print("checking hdfs: %s" % self.hdfs_root) if path.isdir(self.hdfs_root) == False: return False if os.path.isdir(self.root) == False: return False return self.walk(self.root, os.listdir(self.root), self.hdfs_root, hdfs.ls(self.hdfs_root))
def isdir(hdfs_path, project=None): """ Return True if path refers to a directory. Args: :hdfs_path: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS). :project: If this value is not specified, it will get the path to your project. If you need to path to another project, you can specify the name of the project as a string. Returns: True if path refers to a file. Raises: IOError """ if project == None: project = project_name() hdfs_path = _expand_path(hdfs_path, project) return path.isdir(hdfs_path)
def walk(self, parent_path, file_list, hdfs_parent_path, hdfs_file_list): print("%s %s" % (parent_path, hdfs_parent_path)) if len(file_list) == 0 and len(hdfs_file_list) == 0: if os.path.basename(parent_path) == path.basename( hdfs_parent_path): return True return False elif len(file_list) != len(hdfs_file_list): print("No match: number of files in dirs") return False else: file_list.sort( key=lambda f: os.path.isfile(os.path.join(parent_path, f))) hdfs_file_list.sort( key=lambda f: path.isfile(path.join(hdfs_parent_path, f))) hIdx = 0 for idx, sub_path in enumerate(file_list): full_path = os.path.join(parent_path, sub_path) hdfs_sub_path = hdfs_file_list[idx] hdfs_full_path = path.join(hdfs_parent_path, hdfs_sub_path) if (os.path.basename(sub_path) != path.basename(hdfs_sub_path)): print("No match: %s and %s" % (sub_path, hdfs_sub_path)) return False if os.path.isdir(full_path): if path.isdir(hdfs_full_path) == False: print("No match on directory: %s and %s" % (full_path, hdfs_full_path)) return False return self.walk(full_path, os.listdir(full_path), hdfs_full_path, hdfs.ls(hdfs_full_path)) elif os.path.isfile(full_path): sz = os.path.getsize(full_path) hdfs_size = path.getsize(hdfs_full_path) if (hdfs_size != sz): return False return True