Ejemplo n.º 1
0
class FileSystemTest(unittest.TestCase):
    path = '/tmp/luigi-test-dir'
    fs = LocalFileSystem()

    def setUp(self):
        if os.path.exists(self.path):
            shutil.rmtree(self.path)

    def tearDown(self):
        self.setUp()

    def test_mkdir(self):
        testpath = os.path.join(self.path, 'foo/bar')

        self.assertRaises(MissingParentDirectory, self.fs.mkdir, testpath, parents=False)

        self.fs.mkdir(testpath)
        self.assertTrue(os.path.exists(testpath))
        self.assertTrue(self.fs.isdir(testpath))

        self.assertRaises(FileAlreadyExists, self.fs.mkdir, testpath, raise_if_exists=True)

    def test_exists(self):
        self.assertFalse(self.fs.exists(self.path))
        os.mkdir(self.path)
        self.assertTrue(self.fs.exists(self.path))
        self.assertTrue(self.fs.isdir(self.path))

    def test_listdir(self):
        os.mkdir(self.path)
        with open(self.path + '/file', 'w'):
            pass
        self.assertTrue([self.path + '/file'], list(self.fs.listdir(self.path + '/')))
Ejemplo n.º 2
0
class VigraRagTarget(FileSystemTarget):
    fs = LocalFileSystem()

    def makedirs(self):
        """
        Create all parent folders if they do not exist.
        """
        normpath = os.path.normpath(self.path)
        parentfolder = os.path.dirname(normpath)
        if parentfolder:
            try:
                os.makedirs(parentfolder)
            except OSError:
                pass

    def __init__(self, path):
        super(VigraRagTarget, self).__init__(path)

    def open(self, mode='r'):
        raise AttributeError("Not implemented")

    def write(self, rag):
        self.makedirs()
        rag.writeHDF5(self.path, 'data')

    def read(self):
        return vigra.graphs.loadGridRagHDF5(self.path, 'data')
Ejemplo n.º 3
0
class FileSystemTest(unittest.TestCase):
    path = '/tmp/luigi-test-dir'
    fs = LocalFileSystem()

    def setUp(self):
        if os.path.exists(self.path):
            shutil.rmtree(self.path)

    def tearDown(self):
        self.setUp()

    def test_copy(self):
        src = os.path.join(self.path, 'src.txt')
        dest = os.path.join(self.path, 'newdir', 'dest.txt')

        LocalTarget(src).open('w').close()
        self.fs.copy(src, dest)
        self.assertTrue(os.path.exists(src))
        self.assertTrue(os.path.exists(dest))

    def test_mkdir(self):
        testpath = os.path.join(self.path, 'foo/bar')

        self.assertRaises(MissingParentDirectory,
                          self.fs.mkdir,
                          testpath,
                          parents=False)

        self.fs.mkdir(testpath)
        self.assertTrue(os.path.exists(testpath))
        self.assertTrue(self.fs.isdir(testpath))

        self.assertRaises(FileAlreadyExists,
                          self.fs.mkdir,
                          testpath,
                          raise_if_exists=True)

    def test_exists(self):
        self.assertFalse(self.fs.exists(self.path))
        os.mkdir(self.path)
        self.assertTrue(self.fs.exists(self.path))
        self.assertTrue(self.fs.isdir(self.path))

    def test_listdir(self):
        os.mkdir(self.path)
        with open(self.path + '/file', 'w'):
            pass
        self.assertTrue([self.path + '/file'],
                        list(self.fs.listdir(self.path + '/')))

    def test_move_to_new_dir(self):
        # Regression test for a bug in LocalFileSystem.move
        src = os.path.join(self.path, 'src.txt')
        dest = os.path.join(self.path, 'newdir', 'dest.txt')

        LocalTarget(src).open('w').close()
        self.fs.move(src, dest)
        self.assertTrue(os.path.exists(dest))
Ejemplo n.º 4
0
class TestFileSystem(unittest.TestCase):
    path = '/tmp/luigi-test-dir'
    fs = LocalFileSystem()

    def setUp(self):
        if os.path.exists(self.path):
            shutil.rmtree(self.path)

    def tearDown(self):
        self.setUp()

    def test_mkdir(self):
        testpath = os.path.join(self.path, 'foo/bar')
        self.fs.mkdir(testpath)
        self.assertTrue(os.path.exists(testpath))

    def test_exists(self):
        self.assertFalse(self.fs.exists(self.path))
        os.mkdir(self.path)
        self.assertTrue(self.fs.exists(self.path))
Ejemplo n.º 5
0
class BaseTarget(FileSystemTarget):
    """
    Custom target base class
    """
    fs = LocalFileSystem()

    def __init__(self, path):
        super(BaseTarget, self).__init__(path)

    def makedirs(self):
        """
        Create all parent folders if they do not exist.
        """
        normpath = os.path.normpath(self.path)
        parentfolder = os.path.dirname(normpath)
        if parentfolder:
            try:
                os.makedirs(parentfolder)
            except OSError:
                pass
Ejemplo n.º 6
0
    def _make_target_classes(self):
        '''Create client and target objects for storage service.'''

        if self.storage_service == 'local':
            client = LocalFileSystem()
            target = LocalTarget
            flag_target = LocalTarget # just use a normal target

        elif self.storage_service == 's3':
            client = S3Client(
                aws_access_key_id=self.config['aws_access_key_id'],
                aws_secret_access_key=self.config['aws_secret_access_key'])
            target = S3Target
            flag_target = S3FlagTarget

        elif self.storage_service == 'gcs':

            # Use GCP Service Account private key credentials to
            # authenticate with the GCS API. Service Accounts
            # are unique to the GCP project.
            key_json = json.loads(
                    self.config['gcs_service_account_private_key_json'])
            cred = ServiceAccountCredentials.from_json_keyfile_dict(key_json)
            client = GCSClient(oauth_credentials=cred)

            # Hack around a bug in Luigi's GCS module
            class GCSFlagTarget2(GCSTarget):
                fs = None
                def __init__(self, path, format=None, client=None, flag='_SUCCESS'):
                    if format is None:
                        format = luigi.format.get_default_format()
                    if path[-1] != "/":
                        raise ValueError("GCSFlagTarget requires the path to be to a "
                                         "directory.  It must end with a slash ( / ).")
                    # This is the only line that's different
                    super(GCSFlagTarget2, self).__init__(path, client=client)
                    self.format = format
                    self.fs = client or GCSClient()
                    self.flag = flag
                def exists(self):
                    flag_target = self.path + self.flag
                    return self.fs.exists(flag_target)

            target = GCSTarget
            flag_target = GCSFlagTarget2
        else:
            raise EnvironmentError('Please add known file_storage value to config.')

        # Targets will be initialized many times in luigi tasks.
        # Subclass the chosen Target and add the Client to it, so
        # you don't have to pass the Client to the DAG.

        init_kwargs = {
            'format': MixedUnicodeBytesFormat()
        }
        if self.storage_service != 'local':
            init_kwargs['client'] = client

        class TargetWithClient(target):
            def __init__(self, path):
                super(TargetWithClient, self).__init__(path, **init_kwargs)

        class FlagTargetWithClient(flag_target):
            def __init__(self, path):
                super(FlagTargetWithClient, self).__init__(path, **init_kwargs)


        self.client = client
        self.target_class = TargetWithClient
        self.flag_target_class = FlagTargetWithClient