def test_tmppath_not_configured(self): # Given: several target paths to test path1 = "/dir1/dir2/file" path2 = "hdfs:///dir1/dir2/file" path3 = "hdfs://somehost/dir1/dir2/file" path4 = "file:///dir1/dir2/file" path5 = "/tmp/dir/file" path6 = "file:///tmp/dir/file" path7 = "hdfs://somehost/tmp/dir/file" path8 = None path9 = "/tmpdir/file" # When: I create a temporary path for targets res1 = hdfs.tmppath(path1, include_unix_username=False) res2 = hdfs.tmppath(path2, include_unix_username=False) res3 = hdfs.tmppath(path3, include_unix_username=False) res4 = hdfs.tmppath(path4, include_unix_username=False) res5 = hdfs.tmppath(path5, include_unix_username=False) res6 = hdfs.tmppath(path6, include_unix_username=False) res7 = hdfs.tmppath(path7, include_unix_username=False) res8 = hdfs.tmppath(path8, include_unix_username=False) res9 = hdfs.tmppath(path9, include_unix_username=False) # Then: I should get correct results relative to Luigi temporary directory self.assertRegexpMatches(res1, "^/tmp/dir1/dir2/file-luigitemp-\d+") # it would be better to see hdfs:///path instead of hdfs:/path, but single slash also works well self.assertRegexpMatches(res2, "^hdfs:/tmp/dir1/dir2/file-luigitemp-\d+") self.assertRegexpMatches( res3, "^hdfs://somehost/tmp/dir1/dir2/file-luigitemp-\d+") self.assertRegexpMatches(res4, "^file:///tmp/dir1/dir2/file-luigitemp-\d+") self.assertRegexpMatches(res5, "^/tmp/dir/file-luigitemp-\d+") # known issue with duplicated "tmp" if schema is present self.assertRegexpMatches(res6, "^file:///tmp/tmp/dir/file-luigitemp-\d+") # known issue with duplicated "tmp" if schema is present self.assertRegexpMatches( res7, "^hdfs://somehost/tmp/tmp/dir/file-luigitemp-\d+") self.assertRegexpMatches(res8, "^/tmp/luigitemp-\d+") self.assertRegexpMatches(res9, "/tmp/tmpdir/file")
def test_tmppath_not_configured(self): #Given: several target paths to test path1 = "/dir1/dir2/file" path2 = "hdfs:///dir1/dir2/file" path3 = "hdfs://somehost/dir1/dir2/file" path4 = "file:///dir1/dir2/file" path5 = "/tmp/dir/file" path6 = "file:///tmp/dir/file" path7 = "hdfs://somehost/tmp/dir/file" path8 = None path9 = "/tmpdir/file" #When: I create a temporary path for targets res1 = hdfs.tmppath(path1, include_unix_username=False) res2 = hdfs.tmppath(path2, include_unix_username=False) res3 = hdfs.tmppath(path3, include_unix_username=False) res4 = hdfs.tmppath(path4, include_unix_username=False) res5 = hdfs.tmppath(path5, include_unix_username=False) res6 = hdfs.tmppath(path6, include_unix_username=False) res7 = hdfs.tmppath(path7, include_unix_username=False) res8 = hdfs.tmppath(path8, include_unix_username=False) res9 = hdfs.tmppath(path9, include_unix_username=False) #Then: I should get correct results relative to Luigi temporary directory self.assertRegexpMatches(res1,"^/tmp/dir1/dir2/file-luigitemp-\d+") #it would be better to see hdfs:///path instead of hdfs:/path, but single slash also works well self.assertRegexpMatches(res2, "^hdfs:/tmp/dir1/dir2/file-luigitemp-\d+") self.assertRegexpMatches(res3, "^hdfs://somehost/tmp/dir1/dir2/file-luigitemp-\d+") self.assertRegexpMatches(res4, "^file:///tmp/dir1/dir2/file-luigitemp-\d+") self.assertRegexpMatches(res5, "^/tmp/dir/file-luigitemp-\d+") #known issue with duplicated "tmp" if schema is present self.assertRegexpMatches(res6, "^file:///tmp/tmp/dir/file-luigitemp-\d+") #known issue with duplicated "tmp" if schema is present self.assertRegexpMatches(res7, "^hdfs://somehost/tmp/tmp/dir/file-luigitemp-\d+") self.assertRegexpMatches(res8, "^/tmp/luigitemp-\d+") self.assertRegexpMatches(res9, "/tmp/tmpdir/file")
def test_tmppath_username(self): self.assertRegexpMatches(hdfs.tmppath('/path/to/stuff', include_unix_username=True), "^/tmp/[a-z0-9_]+/path/to/stuff-luigitemp-\d+")
def copy_to_hdfs(self): from luigi.hdfs import client, tmppath local_path = self.output().path hdfs_path = tmppath() client.copy(local_path, hdfs_path) self.hdfs_path = hdfs_path