コード例 #1
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
 def cp(fromPath, toPath, force=False):
     """
     Runs 'hdfs dfs -cp fromPath toPath' to copy a file.
     """
     command = ['hdfs', 'dfs', '-cp', fromPath, toPath]
     if force:
         command.insert(3, '-f')
     sh(command)
コード例 #2
0
ファイル: test_util.py プロジェクト: nomoa/analytics-refinery
    def test_sh(self):
        command = ['/bin/echo', 'test-list']
        output = sh(command)
        self.assertEqual(output, 'test-list')

        command = '/bin/echo test-string'
        output = sh(command)
        self.assertEqual(output, 'test-string')
コード例 #3
0
    def test_sh(self):
        command = ['/bin/echo', 'test-list']
        output = sh(command)
        self.assertEqual(output, 'test-list')

        command = '/bin/echo test-string'
        output = sh(command)
        self.assertEqual(output, 'test-string')
コード例 #4
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
 def cat(path):
     """
     Runs hdfs dfs -cat path and returns the contents of the file.
     Be careful with file size, it will be returned as an in-memory string.
     """
     command = ['hdfs', 'dfs', '-cat', path]
     return sh(command).decode('utf-8')
コード例 #5
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
 def get_modified_datetime(path):
     """
     Runs 'hdfs dfs -stat' and returns the modified datetime for the given path.
     """
     stat_str = sh(['hdfs', 'dfs', '-stat', path]).decode('utf-8')
     date_str, time_str = stat_str.strip().split()
     iso_datetime_str = date_str + 'T' + time_str + 'Z'
     return parser.parse(iso_datetime_str)
コード例 #6
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
    def rmdir(paths):
        """
        Runs hdfs dfs -rmdir on paths.
        """
        if isinstance(paths, str):
            paths = paths.split()

        return sh(['hdfs', 'dfs', '-rmdir'] + paths)
コード例 #7
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
    def touchz(paths):
        """
        Runs hdfs dfs -touchz paths, optinally skipping trash.
        """
        if isinstance(paths, str):
            paths = paths.split()

        return sh(['hdfs', 'dfs', '-touchz'] + paths)
コード例 #8
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
    def mkdir(paths, create_parent=True):
        """
        Runs hdfs dfs -mkdir -p on paths.
        """
        options = ['-p'] if create_parent else []
        if isinstance(paths, str):
            paths = paths.split()

        return sh(['hdfs', 'dfs', '-mkdir'] + options + paths)
コード例 #9
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
    def rm(paths, recurse=True, skip_trash=True):
        """
        Runs hdfs dfs -rm -R on paths, optinally skipping trash.
        """
        if isinstance(paths, str):
            paths = paths.split()

        options = (['-R'] if recurse else []) + (['-skipTrash'] if skip_trash else [])
        return sh(['hdfs', 'dfs', '-rm'] + options + paths)
コード例 #10
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
    def mv(from_paths, to_paths, inParent=True):
        """
        Runs hdfs dfs -mv fromPath toPath for each values of from/to Paths.
        If inParent is True (default), the parent folder in each of the
        to_paths provide is used as destination. Set inParent parameter to
        False if the file/folder moved is also renamed.
        """
        if isinstance(from_paths, str):
            from_paths = from_paths.split()

        if isinstance(to_paths, str):
            to_paths = to_paths.split()

        if len(from_paths) != len(to_paths):
            raise Exception('from_paths and to_paths size don\'t match in hdfs mv function')

        for i in range(len(from_paths)) :
            toParent = '/'.join(to_paths[i].split('/')[:-1])
            if not Hdfs.ls(toParent, include_children=False):
                Hdfs.mkdir(toParent)
            if (inParent):
                sh(['hdfs', 'dfs', '-mv', from_paths[i], toParent])
            else:
                sh(['hdfs', 'dfs', '-mv', from_paths[i], to_paths[i]])
コード例 #11
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
    def ls(paths, include_children=True, with_details=False):
        """
        Runs hdfs dfs -ls on paths.

        Parameters:
            paths            : List or string paths to files to ls.  Can include shell globs.
            include_children : If include_children is False, the -d flag will
                               be given to hdfs dfs -ls.
        Returns:
            Array of paths matching the ls-ed path.
        """

        if isinstance(paths, str):
            paths = paths.split()

        options = []
        if not include_children:
            options.append('-d')

        split_lines = [
            line.split() for line in sh(
                ['hdfs', 'dfs', '-ls'] + options + paths,
                # Not checking return code here so we don't
                # fail paths do not exist.
                check_return_code=False
            ).splitlines() if not line.startswith(b'Found ')
        ]

        if with_details:
            return [
                {
                    'file_type': 'f' if parts[0].decode('utf-8')[0] == '-' else 'd',
                    'permission': parts[0][1:],
                    'replication': parts[1],
                    'owner': parts[2],
                    'group': parts[3],
                    'file_size': parts[4],
                    'modification_date': parts[5],
                    'modification_time': parts[6],
                    'path': parts[7]
                } for parts in split_lines
            ]
        else:
            return [parts[-1] for parts in split_lines]
コード例 #12
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
 def dir_bytes_size(path):
     """
     Returns the size in bytes of a hdfs path
     """
     return int(sh(['hdfs', 'dfs', '-du', '-s', path]).split()[0])
コード例 #13
0
 def test_sh_pipe(self):
     command = '/bin/echo hi_there | /usr/bin/env sed -e \'s@_there@_you@\''
     output = sh(command)
     self.assertEqual(output, 'hi_you')
コード例 #14
0
ファイル: hive.py プロジェクト: wikimedia/analytics-refinery
 def _command(self, args, check_return_code=True):
     """Runs the `hive` from the command line, passing in the given args, and
        returning stdout.
     """
     cmd = self.hivecmd + args
     return sh(cmd, check_return_code)
コード例 #15
0
ファイル: test_util.py プロジェクト: nomoa/analytics-refinery
 def test_sh_pipe(self):
     command = '/bin/echo hi_there | /usr/bin/env sed -e \'s@_there@_you@\''
     output = sh(command)
     self.assertEqual(output, 'hi_you')
コード例 #16
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
 def put(local_path, hdfs_path, force=False):
     """
     Runs 'hdfs dfs -put local_path hdfs_path' to copy a local file over to hdfs.
     """
     options = ['-f'] if force else []
     sh(['hdfs', 'dfs', '-put'] + options + [local_path, hdfs_path])
コード例 #17
0
ファイル: hdfs.py プロジェクト: wikimedia/analytics-refinery
 def get(hdfs_path, local_path, force=False):
     """
     Runs 'hdfs dfs -get hdfs_path local_path' to copy a local file over to hdfs.
     """
     options = ['-f'] if force else []
     sh(['hdfs', 'dfs', '-get'] + options + [hdfs_path, local_path])