def _pick_spark_tmp_dir(self): if self._opts['spark_tmp_dir']: return self.fs.join(self._opts['spark_tmp_dir'], self._job_key) else: master = self._spark_master() or 'local' if master.startswith('local'): # including local-cluster # need a local temp dir # add "-spark" so we don't collide with default local temp dir return os.path.join(gettempdir(), self._job_key + '-spark') else: # use HDFS (same default as HadoopJobRunner) return posixpath.join(fully_qualify_hdfs_path('tmp/mrjob'), self._job_key)
def _pick_spark_tmp_dir(self): if self._opts['spark_tmp_dir']: return self.fs.join(self._opts['spark_tmp_dir'], self._job_key) else: master = self._spark_master() or 'local' if master.startswith('local'): # including local-cluster # need a local temp dir # add "-spark" so we don't collide with default local temp dir return os.path.join( gettempdir(), self._job_key + '-spark') else: # use HDFS (same default as HadoopJobRunner) return posixpath.join( fully_qualify_hdfs_path('tmp/mrjob'), self._job_key)
def _pick_spark_tmp_dir(self): if self._opts['spark_tmp_dir']: if is_uri(self._opts['spark_tmp_dir']): return posixpath.join( self._opts['spark_tmp_dir'], self._job_key) else: return os.path.join( self._opts['spark_tmp_dir'], self._job_key) elif self._spark_master_is_local(): # need a local temp dir # add "-spark" so we don't collide with default local temp dir return os.path.join( gettempdir(), self._job_key + '-spark') else: # use HDFS (same default as HadoopJobRunner) return posixpath.join( fully_qualify_hdfs_path('tmp/mrjob'), self._job_key)
def test_hdfs_uri(self): self.assertEqual(fully_qualify_hdfs_path('hdfs://host/path/'), 'hdfs://host/path/')
def test_absolute_path(self): self.assertEqual(fully_qualify_hdfs_path('/path/to/cheese'), 'hdfs:///path/to/cheese')
def test_relative_path(self): with patch('getpass.getuser') as getuser: getuser.return_value = 'dave' self.assertEqual(fully_qualify_hdfs_path('path/to/chocolate'), 'hdfs:///user/dave/path/to/chocolate')
def test_empty(self): with patch('getpass.getuser') as getuser: getuser.return_value = 'dave' self.assertEqual(fully_qualify_hdfs_path(''), 'hdfs:///user/dave/')
def test_other_uri(self): self.assertEqual(fully_qualify_hdfs_path('foo://bar/baz'), 'foo://bar/baz')
def test_s3n_uri(self): self.assertEqual(fully_qualify_hdfs_path('s3n://bucket/oh/noes'), 's3n://bucket/oh/noes')
def test_relative_path(self): with patch("getpass.getuser") as getuser: getuser.return_value = "dave" self.assertEqual(fully_qualify_hdfs_path("path/to/chocolate"), "hdfs:///user/dave/path/to/chocolate")
def test_empty(self): with patch("getpass.getuser") as getuser: getuser.return_value = "dave" self.assertEqual(fully_qualify_hdfs_path(""), "hdfs:///user/dave/")