Example #1
0
    def test_tmppath_not_configured(self):
        # Given: several target paths to test
        path1 = "/dir1/dir2/file"
        path2 = "hdfs:///dir1/dir2/file"
        path3 = "hdfs://somehost/dir1/dir2/file"
        path4 = "file:///dir1/dir2/file"
        path5 = "/tmp/dir/file"
        path6 = "file:///tmp/dir/file"
        path7 = "hdfs://somehost/tmp/dir/file"
        path8 = None
        path9 = "/tmpdir/file"

        # When: I create a temporary path for targets
        res1 = hdfs.tmppath(path1, include_unix_username=False)
        res2 = hdfs.tmppath(path2, include_unix_username=False)
        res3 = hdfs.tmppath(path3, include_unix_username=False)
        res4 = hdfs.tmppath(path4, include_unix_username=False)
        res5 = hdfs.tmppath(path5, include_unix_username=False)
        res6 = hdfs.tmppath(path6, include_unix_username=False)
        res7 = hdfs.tmppath(path7, include_unix_username=False)
        res8 = hdfs.tmppath(path8, include_unix_username=False)
        res9 = hdfs.tmppath(path9, include_unix_username=False)

        # Then: I should get correct results relative to Luigi temporary directory
        self.assertRegexpMatches(res1, "^/tmp/dir1/dir2/file-luigitemp-\d+")
        # it would be better to see hdfs:///path instead of hdfs:/path, but single slash also works well
        self.assertRegexpMatches(res2,
                                 "^hdfs:/tmp/dir1/dir2/file-luigitemp-\d+")
        self.assertRegexpMatches(
            res3, "^hdfs://somehost/tmp/dir1/dir2/file-luigitemp-\d+")
        self.assertRegexpMatches(res4,
                                 "^file:///tmp/dir1/dir2/file-luigitemp-\d+")
        self.assertRegexpMatches(res5, "^/tmp/dir/file-luigitemp-\d+")
        # known issue with duplicated "tmp" if schema is present
        self.assertRegexpMatches(res6,
                                 "^file:///tmp/tmp/dir/file-luigitemp-\d+")
        # known issue with duplicated "tmp" if schema is present
        self.assertRegexpMatches(
            res7, "^hdfs://somehost/tmp/tmp/dir/file-luigitemp-\d+")
        self.assertRegexpMatches(res8, "^/tmp/luigitemp-\d+")
        self.assertRegexpMatches(res9, "/tmp/tmpdir/file")
Example #2
0
    def test_tmppath_not_configured(self):
        #Given: several target paths to test
        path1 = "/dir1/dir2/file"
        path2 = "hdfs:///dir1/dir2/file"
        path3 = "hdfs://somehost/dir1/dir2/file"
        path4 = "file:///dir1/dir2/file"
        path5 = "/tmp/dir/file"
        path6 = "file:///tmp/dir/file"
        path7 = "hdfs://somehost/tmp/dir/file"
        path8 = None
        path9 = "/tmpdir/file"

        #When: I create a temporary path for targets
        res1 = hdfs.tmppath(path1, include_unix_username=False)
        res2 = hdfs.tmppath(path2, include_unix_username=False)
        res3 = hdfs.tmppath(path3, include_unix_username=False)
        res4 = hdfs.tmppath(path4, include_unix_username=False)
        res5 = hdfs.tmppath(path5, include_unix_username=False)
        res6 = hdfs.tmppath(path6, include_unix_username=False)
        res7 = hdfs.tmppath(path7, include_unix_username=False)
        res8 = hdfs.tmppath(path8, include_unix_username=False)
        res9 = hdfs.tmppath(path9, include_unix_username=False)

        #Then: I should get correct results relative to Luigi temporary directory
        self.assertRegexpMatches(res1,"^/tmp/dir1/dir2/file-luigitemp-\d+")
        #it would be better to see hdfs:///path instead of hdfs:/path, but single slash also works well
        self.assertRegexpMatches(res2, "^hdfs:/tmp/dir1/dir2/file-luigitemp-\d+")
        self.assertRegexpMatches(res3, "^hdfs://somehost/tmp/dir1/dir2/file-luigitemp-\d+")
        self.assertRegexpMatches(res4, "^file:///tmp/dir1/dir2/file-luigitemp-\d+")
        self.assertRegexpMatches(res5, "^/tmp/dir/file-luigitemp-\d+")
        #known issue with duplicated "tmp" if schema is present
        self.assertRegexpMatches(res6, "^file:///tmp/tmp/dir/file-luigitemp-\d+")
        #known issue with duplicated "tmp" if schema is present
        self.assertRegexpMatches(res7, "^hdfs://somehost/tmp/tmp/dir/file-luigitemp-\d+")
        self.assertRegexpMatches(res8, "^/tmp/luigitemp-\d+")
        self.assertRegexpMatches(res9,  "/tmp/tmpdir/file")
Example #3
0
 def test_tmppath_username(self):
     self.assertRegexpMatches(hdfs.tmppath('/path/to/stuff', include_unix_username=True),
                              "^/tmp/[a-z0-9_]+/path/to/stuff-luigitemp-\d+")
 def copy_to_hdfs(self):
     from luigi.hdfs import client, tmppath
     local_path = self.output().path
     hdfs_path = tmppath()
     client.copy(local_path, hdfs_path)
     self.hdfs_path = hdfs_path