class EffectiveUserTest(MiniClusterTestBase): ERR_MSG_TOUCH = "org.apache.hadoop.security.AccessControlException\nPermission denied: user=__foobar" ERR_MSG_STAT = "`/foobar2': No such file or directory" VALID_FILE = '/foobar' INVALID_FILE = '/foobar2' def setUp(self): self.custom_client = Client(self.cluster.host, self.cluster.port) self.custom_foobar_client = Client(host=self.cluster.host, port=self.cluster.port, effective_user='******') def test_touch(self): print(tuple(self.custom_client.touchz([self.VALID_FILE]))) try: tuple(self.custom_foobar_client.touchz([self.INVALID_FILE])) except Exception as e: self.assertTrue(e.args[0].startswith(self.ERR_MSG_TOUCH)) self.custom_client.stat([self.VALID_FILE]) try: self.custom_client.stat([self.INVALID_FILE]) except Exception as e: self.assertEquals(e.args[0], self.ERR_MSG_STAT)
def signature(self): client = Client(self._host, self._port, effective_user=self._user, use_trash=False) stats = client.stat([self._partial]) if stats['file_type'] == 'f': return "modification_time:{}".format(stats['modification_time']) else: return stats['file_type']
def signature(self): client = Client(self._host, self._port, effective_user=self._user, use_trash=False) stats = client.stat([self._partial]) if stats['file_type'] == 'f': return "modification_time:{}".format(stats['modification_time']) else: return stats['file_type']
class HDFSTextLoader(Unit, TriviallyDistributable): def __init__(self, workflow, **kwargs): super(HDFSTextLoader, self).__init__(workflow, **kwargs) self.file_name = kwargs["file"] self.chunk_lines_number = kwargs.get("chunk", 1000) client_kwargs = dict(kwargs) del client_kwargs["file"] if "chunk" in kwargs: del client_kwargs["chunk"] self.hdfs_client = Client(**client_kwargs) self.output = [""] * self.chunk_lines_number self.finished = Bool() def initialize(self): self.debug("Opened %s", self.hdfs_client.stat([self.file_name])) self._generator = self.hdfs_client.text([self.file_name]) def run(self): assert not self.finished try: for i in range(self.chunk_lines_number): self.output[i] = next(self._generator) except StopIteration: self.finished <<= True
class HDFSTextLoader(Unit, TriviallyDistributable): def __init__(self, workflow, **kwargs): super(HDFSTextLoader, self).__init__(workflow, **kwargs) self.file_name = kwargs["file"] self.chunk_lines_number = kwargs.get("chunk", 1000) client_kwargs = dict(kwargs) del client_kwargs["file"] if "chunk" in kwargs: del client_kwargs["chunk"] self.hdfs_client = Client(**client_kwargs) self.output = [""] * self.chunk_lines_number self.finished = Bool() def initialize(self): self.debug("Opened %s", self.hdfs_client.stat([self.file_name])) self._generator = self.hdfs_client.text([self.file_name]) def run(self): assert not self.finished try: for i in range(self.chunk_lines_number): self.output[i] = next(self._generator) except StopIteration: self.finished <<= True