Ejemplo n.º 1
0
 def output(self):
     # return luigi.LocalTarget(os.path.join(
     #     self.logDir,
     #     'DownloadRawData_{}.txt'.format(self.runDate.strftime('%Y%m%d'))
     #     ))
     # client = S3Client()
     # return S3Target('s3://' + self.bucket + '/' + self.getKeyS3(), client=client)
     return utilFuncs.getS3Target(S3Client(), self.getKeyS3())
Ejemplo n.º 2
0
    def test_get_as_string_latin1(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.put(self.tempFilePath, 's3://mybucket/putMe3')

        contents = s3_client.get_as_string('s3://mybucket/putMe3', encoding='ISO-8859-1')

        self.assertEquals(contents, self.tempFileContents.decode('ISO-8859-1'))
Ejemplo n.º 3
0
 def output(self):
     params = config(section='s3')
     client = S3Client(**params)
     return S3Target(
         's3://s3-bucket-wikidata/{}/wikipedia_info_output.csv'.format(
             strftime("%Y-%m-%d")),
         format=UTF8,
         client=client)
Ejemplo n.º 4
0
    def test_get_as_string(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.put(self.tempFilePath, 's3://mybucket/putMe2')

        contents = s3_client.get_as_string('s3://mybucket/putMe2')

        self.assertEquals(contents, self.tempFileContents.decode('utf-8'))
Ejemplo n.º 5
0
 def test_read(self):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     create_bucket()
     client.put(self.tempFilePath, 's3://mybucket/tempfile')
     t = S3Target('s3://mybucket/tempfile', client=client)
     read_file = t.open()
     file_str = read_file.read()
     self.assertEqual(self.tempFileContents, file_str.encode('utf-8'))
Ejemplo n.º 6
0
    def test_get_as_bytes(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.put(self.tempFilePath, 's3://mybucket/putMe')

        contents = s3_client.get_as_bytes('s3://mybucket/putMe')

        self.assertEquals(contents, self.tempFileContents)
Ejemplo n.º 7
0
    def test_remove_bucket_dne(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)

        self.assertRaises(
            ClientError,
            lambda: s3_client.remove('s3://bucketdoesnotexist/file')
        )
Ejemplo n.º 8
0
 def output(self):
     # return luigi.LocalTarget(os.path.join(
     #     self.logDir,
     #     'CalcData_{}_{}.csv'.format(self.country, self.runDate.strftime('%Y%m%d'))
     #     ))
     # client = S3Client()
     # return self.getS3Target(client, self.getKeyS3())
     return utilFuncs.getS3Target(S3Client(), self.getKeyS3())
Ejemplo n.º 9
0
 def output(self):
     ctryStr = self.country if self.country else 'All'
     # return luigi.LocalTarget(os.path.join(
     # self.logDir,
     # 'NormData_{}_{}.csv'.format(ctryStr, self.runDate.strftime('%Y%m%d'))
     # ))
     # return S3Target('s3://' + utilFuncs.BUCKET + '/' + self.getKeyS3(ctryStr), client=S3Client())
     return utilFuncs.getS3Target(S3Client(), self.getKeyS3(ctryStr))
Ejemplo n.º 10
0
    def test_remove_invalid(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)

        self.assertRaises(
            InvalidDeleteException,
            lambda: s3_client.remove('s3://mybucket/')
        )
Ejemplo n.º 11
0
    def _read_schema_file(self):
        s3Client = S3Client()
        if not s3Client.exists(self.s3_schema_path()):
            raise Exception("No schema file located at %s.  Can not set Redshift columns." % s3_schema_path)
        else:
            logger.info("Found schema file %s" % self.s3_load_path())

        schema_key = s3Client.get_key(self.s3_load_path())
        return schema_key.get_contents_as_string()
Ejemplo n.º 12
0
    def test_get_as_string(self):
        # put a file on s3 first
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')
        s3_client.put(self.tempFilePath, 's3://mybucket/putMe')

        contents = s3_client.get_as_string('s3://mybucket/putMe')

        self.assertEqual(contents, self.tempFileContents)
Ejemplo n.º 13
0
    def test_remove_dir(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)

        # test that the marker file created by Hadoop S3 Native FileSystem is removed
        s3_client.put(self.tempFilePath, 's3://mybucket/removemedir/file')
        s3_client.put_string("", 's3://mybucket/removemedir_$folder$')
        self.assertTrue(s3_client.remove('s3://mybucket/removemedir'))
        self.assertFalse(s3_client.exists('s3://mybucket/removemedir_$folder$'))
Ejemplo n.º 14
0
    def test_list(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)

        s3_client.put_string("", 's3://mybucket/hello/frank')
        s3_client.put_string("", 's3://mybucket/hello/world')

        self.assertEqual(['frank', 'world'],
                         list(s3_client.list('s3://mybucket/hello')))
Ejemplo n.º 15
0
class ContentData(ExternalTask):
    DATA_ROOT = 's3://advancedpythonmeenu/scifact/'  # Root S3 path, as a constant
    data_name = Parameter(default="arxivData.json"
                          )  # Filename of the dataset under the root s3 path
    client = S3Client(env("AWS_ACCESS_KEY_ID"), env("AWS_SECRET_ACCESS_KEY"))

    def output(self):
        # return the S3Target of the dataset
        return S3Target(self.DATA_ROOT + self.data_name, client=self.client)
Ejemplo n.º 16
0
    def test_remove_dir_batch(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)

        for i in range(0, 2000):
            s3_client.put(self.tempFilePath,
                          's3://mybucket/removemedir/file{i}'.format(i=i))
        self.assertTrue(s3_client.remove('s3://mybucket/removemedir/'))
        self.assertFalse(s3_client.exists('s3://mybucket/removedir/'))
Ejemplo n.º 17
0
    def test_remove_dir_not_recursive(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)

        s3_client.put(self.tempFilePath, 's3://mybucket/removemedir/file')
        self.assertRaises(
            InvalidDeleteException,
            lambda: s3_client.remove('s3://mybucket/removemedir', recursive=False)
        )
Ejemplo n.º 18
0
    def test_list_key(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)

        s3_client.put_string("", 's3://mybucket/hello/frank')
        s3_client.put_string("", 's3://mybucket/hello/world')

        self.assertEqual([True, True],
                         [s3_client.exists('s3://' + x.bucket_name + '/' + x.key) for x in s3_client.listdir('s3://mybucket/hello', return_key=True)])
Ejemplo n.º 19
0
 def test_init_without_init_or_config(self, mock):
     """If no config or arn provided, boto3 client
        should be called with default parameters.
        Delegating ENV or Task Role credential handling
        to boto3 itself.
     """
     S3Client().s3
     mock.assert_called_with('s3', aws_access_key_id=None,
                             aws_secret_access_key=None, aws_session_token=None)
Ejemplo n.º 20
0
    def run(self):
        s3filename = str(self.S3_ROOT + self.data)

        print("S3 filename:", s3filename)
        print("Local path:", self.path)

        client = S3Client(env("AWS_ACCESS_KEY_ID"),
                          env("AWS_SECRET_ACCESS_KEY"))
        #This function creates the file atomically
        client.get(s3filename, self.path + self.data)
Ejemplo n.º 21
0
class SavedModel(ExternalTask):
    MODEL_ROOT = 's3://advancedpythonmeenu/scifact/'

    model = Parameter(
        default="rationale_roberta_large_fever.zip")  # Filename of the model
    client = S3Client(env("AWS_ACCESS_KEY_ID"), env("AWS_SECRET_ACCESS_KEY"))

    def output(self):
        # return the S3Target of the model
        return S3Target(self.MODEL_ROOT + self.model, client=self.client)
Ejemplo n.º 22
0
    def test_list_key(self):
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')

        s3_client.put_string("", 's3://mybucket/hello/frank')
        s3_client.put_string("", 's3://mybucket/hello/world')

        self.assertEqual([True, True], [
            x.exists()
            for x in s3_client.list('s3://mybucket/hello', return_key=True)
        ])
Ejemplo n.º 23
0
    def test_exists(self):
        # Test exists S3Client
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)

        # self.assertTrue(s3_client.exists('s3://psetbucket/'))
        # self.assertTrue(s3_client.exists('s3://psetbucket'))
        # self.assertFalse(s3_client.exists('s3://psetbucket/nope'))
        # self.assertFalse(s3_client.exists('s3://psetbucket/nope/'))

        s3_client.put(self.tempFilePath, 's3://psetbucket/tempfile')
Ejemplo n.º 24
0
    def test_init_with_environment_variables(self):
        os.environ['AWS_ACCESS_KEY_ID'] = 'foo'
        os.environ['AWS_SECRET_ACCESS_KEY'] = 'bar'
        # Don't read any exsisting config
        old_config_paths = configuration.LuigiConfigParser._config_paths
        configuration.LuigiConfigParser._config_paths = [tempfile.mktemp()]

        s3_client = S3Client()
        configuration.LuigiConfigParser._config_paths = old_config_paths

        self.assertEqual(s3_client.s3.gs_access_key_id, 'foo')
        self.assertEqual(s3_client.s3.gs_secret_access_key, 'bar')
Ejemplo n.º 25
0
 def setUp(self, mock_config):
     f = tempfile.NamedTemporaryFile(mode='wb', delete=False)
     self.tempFileContents = "I'm a temporary file for testing\nAnd this is the second line\nThis is the third."
     f.write(self.tempFileContents)
     f.close()
     self.tempFilePath = f.name
     self.file_name = f.name[f.name.rindex('/')+1:]
     self.local_path = f.name[:f.name.rindex('/')]
     self.s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     bucket = self.s3_client.s3.create_bucket('bucket')
     k = Key(bucket)
     k.key = 'key/%s' % self.file_name
     mock_config.get_config.return_value.get.return_value = AWS_ACCESS_KEY
Ejemplo n.º 26
0
    def test_get(self):
        # put a file on s3 first
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')
        s3_client.put(self.tempFilePath, 's3://mybucket/putMe')

        tmp_file = tempfile.NamedTemporaryFile(delete=True)
        tmp_file_path = tmp_file.name

        s3_client.get('s3://mybucket/putMe', tmp_file_path)
        self.assertEqual(tmp_file.read(), self.tempFileContents)

        tmp_file.close()
Ejemplo n.º 27
0
    def test_get(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.put(self.tempFilePath, 's3://mybucket/putMe')

        tmp_file = tempfile.NamedTemporaryFile(delete=True)
        tmp_file_path = tmp_file.name

        s3_client.get('s3://mybucket/putMe', tmp_file_path)
        with open(tmp_file_path, 'r') as f:
            content = f.read()
        self.assertEquals(content, self.tempFileContents.decode("utf-8"))
        tmp_file.close()
Ejemplo n.º 28
0
    def test_isdir(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        self.assertTrue(s3_client.isdir('s3://mybucket'))

        s3_client.put(self.tempFilePath, 's3://mybucket/tempdir0_$folder$')
        self.assertTrue(s3_client.isdir('s3://mybucket/tempdir0'))

        s3_client.put(self.tempFilePath, 's3://mybucket/tempdir1/')
        self.assertTrue(s3_client.isdir('s3://mybucket/tempdir1'))

        s3_client.put(self.tempFilePath, 's3://mybucket/key')
        self.assertFalse(s3_client.isdir('s3://mybucket/key'))
Ejemplo n.º 29
0
    def test_mkdir(self):
        create_bucket()
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        self.assertTrue(s3_client.isdir('s3://mybucket'))
        s3_client.mkdir('s3://mybucket')

        s3_client.mkdir('s3://mybucket/dir')
        self.assertTrue(s3_client.isdir('s3://mybucket/dir'))

        self.assertRaises(MissingParentDirectory,
                          s3_client.mkdir, 's3://mybucket/dir/foo/bar', parents=False)

        self.assertFalse(s3_client.isdir('s3://mybucket/dir/foo/bar'))
Ejemplo n.º 30
0
    def _connect(self, write=False):
        credentials = self._get_credentials(write)
        if self.settings['protocol'] == 's3':
            from luigi.contrib.s3 import S3Client
            from d6tpipe.luigi.s3 import S3Client as S3ClientToken
            if write:
                if 'aws_session_token' in credentials:
                    cnxn = S3ClientToken(**credentials)
                else:
                    cnxn = S3Client(**credentials)
            else:
                if 'aws_session_token' in credentials:
                    cnxn = S3ClientToken(**credentials)
                else:
                    cnxn = S3Client(**credentials)
        elif self.settings['protocol'] == 'ftp':
            from d6tpipe.luigi.ftp import RemoteFileSystem
            cnxn = RemoteFileSystem(self.settings['location'],
                                    credentials['username'],
                                    credentials['password'])
        elif self.settings['protocol'] == 'sftp':
            from d6tpipe.luigi.ftp import RemoteFileSystem
            try:
                import pysftp
            except ImportError:
                raise ModuleNotFoundError('Please install pysftp to use SFTP.')
            cnopts = pysftp.CnOpts()
            cnopts.hostkeys = None

            cnxn = RemoteFileSystem(self.settings['location'],
                                    credentials['username'],
                                    credentials['password'],
                                    sftp=True,
                                    pysftp_conn_kwargs={'cnopts': cnopts})
        else:
            raise NotImplementedError('only s3 and ftp supported')

        return cnxn
Ejemplo n.º 31
0
 def test__path_to_bucket_and_key(self):
     self.assertEqual(('bucket', 'key'), S3Client._path_to_bucket_and_key('s3://bucket/key'))
Ejemplo n.º 32
0
 def test__path_to_bucket_and_key_with_question_mark(self):
     self.assertEqual(('bucket', 'key?blade'), S3Client._path_to_bucket_and_key('s3://bucket/key?blade'))