Esempio n. 1
0
    def test_remove(self):
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')

        self.assertRaises(
            S3ResponseError,
            lambda: s3_client.remove('s3://bucketdoesnotexist/file'))

        self.assertFalse(s3_client.remove('s3://mybucket/doesNotExist'))

        s3_client.put(self.tempFilePath, 's3://mybucket/existingFile0')
        self.assertTrue(s3_client.remove('s3://mybucket/existingFile0'))
        self.assertFalse(s3_client.exists('s3://mybucket/existingFile0'))

        self.assertRaises(InvalidDeleteException,
                          lambda: s3_client.remove('s3://mybucket/'))

        self.assertRaises(InvalidDeleteException,
                          lambda: s3_client.remove('s3://mybucket'))

        s3_client.put(self.tempFilePath, 's3://mybucket/removemedir/file')
        self.assertRaises(
            InvalidDeleteException,
            lambda: s3_client.remove('s3://mybucket/removemedir',
                                     recursive=False))
Esempio n. 2
0
 def test_put_sse(self):
     s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     s3_client.s3.create_bucket('mybucket')
     s3_client.put(self.tempFilePath,
                   's3://mybucket/putMe',
                   encrypt_key=True)
     self.assertTrue(s3_client.exists('s3://mybucket/putMe'))
    def test_sanity_test_table_task(self, mock_config):
        mock_config.get_config.return_value.get.return_value = AWS_ACCESS_KEY
        t = TestSanityTestDynamoDBTableTask()

        # mock s3 location for writing output token
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')

        # create table
        table_name = 'dynamo_table1'
        schema = [HashKey('my_hash', data_type=STRING)]
        indexes = [
            AllIndex('IndexName',
                     parts=[
                         HashKey('my_hash', data_type=STRING),
                         RangeKey('range_index', data_type=NUMBER)
                     ])
        ]
        throughput = {'read': 2, 'write': 4}
        client = DynamoDBClient(aws_access_key_id=AWS_ACCESS_KEY,
                                aws_secret_access_key=AWS_SECRET_KEY)
        client.create_table(table_name, schema, throughput, indexes=indexes)

        self.assertRaises(DynamoDBTaskException,
                          luigi.build([t], local_scheduler=True))
Esempio n. 4
0
 def test_put_string_sse(self):
     s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     s3_client.s3.create_bucket('mybucket')
     s3_client.put_string("SOMESTRING",
                          's3://mybucket/putString',
                          encrypt_key=True)
     self.assertTrue(s3_client.exists('s3://mybucket/putString'))
class CreateRandomData(Task):
    nrows = IntParameter(default=20)
    ncolumns = IntParameter(default=4)
    csv_string = None
    s3_client = S3Client(aws_access_key_id=ACCESS_KEY,
                         aws_secret_access_key=ACCESS_SECRET)

    def requires(self):
        return []

    def output(self):
        s3_filepath = 's3://' + BUCKET + '/random_numbers.csv'
        return S3Target(s3_filepath)

    def run(self):
        df = DataFrame(np.random.rand(self.nrows, self.ncolumns),
                       columns=ALPHABET[0:self.ncolumns])
        df.index.name = 'index'
        output = df.to_string()

        conn = S3Connection(ACCESS_KEY, ACCESS_SECRET)
        bucket = conn.get_bucket(BUCKET)
        file = Key(bucket)
        file.key = 'random_numbers.csv'
        file.set_contents_from_string(output)
Esempio n. 6
0
    def test_copy_dir(self):
        """
        Test copying 20 files from one folder to another
        """

        n = 20
        copy_part_size = (1024 ** 2) * 5

        # Note we can't test the multipart copy due to moto issue #526
        # so here I have to keep the file size smaller than the copy_part_size
        file_size = 5000

        s3_dir = 's3://mybucket/copydir/'
        file_contents = b"a" * file_size
        tmp_file = tempfile.NamedTemporaryFile(mode='wb', delete=True)
        tmp_file_path = tmp_file.name
        tmp_file.write(file_contents)
        tmp_file.flush()

        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')

        for i in range(n):
            file_path = s3_dir + str(i)
            s3_client.put_multipart(tmp_file_path, file_path)
            self.assertTrue(s3_client.exists(file_path))

        s3_dest = 's3://mybucket/copydir_new/'
        s3_client.copy(s3_dir, s3_dest, threads=10, part_size=copy_part_size)

        for i in range(n):
            original_size = s3_client.get_key(s3_dir + str(i)).size
            copy_size = s3_client.get_key(s3_dest + str(i)).size
            self.assertEqual(original_size, copy_size)
Esempio n. 7
0
 def create_target(self, format=None, **kwargs):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     client.s3.create_bucket('mybucket')
     return S3Target('s3://mybucket/test_file',
                     client=client,
                     format=format,
                     **kwargs)
Esempio n. 8
0
 def _get_s3_client(self):
     if not hasattr(self, "client"):
         self.client = \
             S3Client(
                 luigi.configuration.get_config().get('s3', 'aws_access_key_id'),
                 luigi.configuration.get_config().get('s3', 'aws_secret_access_key'))
     return self.client
Esempio n. 9
0
def load(client, loadDate, bucket):
    print "loading AdUnits"
    adUnitsMap = getAllAdUnits(client)
    parentedList = addParents(adUnitsMap)

    #remove any existing local files
    for auFile in glob.glob(CONFIG['local']['prefix'] + '*'):
        os.remove(auFile)

    #Write data to .CSV
    outFile = (os.path.dirname(CONFIG['local']['prefix']) + "/" +
               CONFIG['local']['name'] + "_" +
               loadDate.strftime(CONFIG['local']['date_format']) + "." +
               CONFIG['local']['format'])

    written = writeAsCsv(parentedList, outFile)

    #Copy to S3
    s3File = bucket + CONFIG['s3']['folder'] + loadDate.strftime(
        CONFIG['s3']['date_format']) + CONFIG['s3']['file']
    s3Client = S3Client(
    )  #should get authentication data from server boto config
    s3Client.put(outFile, s3File)

    print str(len(adUnitsMap)) + " adUnit Map size. " + str(
        len(parentedList)) + " parented List size."
    # save to DB
    saved = saveToDb(parentedList)

    return len(parentedList)
Esempio n. 10
0
    def test_read_iterator_long(self):
        # write a file that is 5X the boto buffersize
        # to test line buffering
        old_buffer = key.Key.BufferSize
        key.Key.BufferSize = 2
        try:
            tempf = tempfile.NamedTemporaryFile(mode='wb', delete=False)
            temppath = tempf.name
            firstline = ''.zfill(key.Key.BufferSize * 5) + os.linesep
            contents = firstline + 'line two' + os.linesep + 'line three'
            tempf.write(contents.encode('utf-8'))
            tempf.close()

            client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
            client.s3.create_bucket('mybucket')
            client.put(temppath, 's3://mybucket/largetempfile')
            t = S3Target('s3://mybucket/largetempfile', client=client)
            with t.open() as read_file:
                lines = [line for line in read_file]
        finally:
            key.Key.BufferSize = old_buffer

        self.assertEqual(3, len(lines))
        self.assertEqual(firstline, lines[0])
        self.assertEqual("line two" + os.linesep, lines[1])
        self.assertEqual("line three", lines[2])
Esempio n. 11
0
    def _run_remote_temp_upload_test(self, file_size):
        file_contents = b"a" * file_size

        tmp_file = tempfile.NamedTemporaryFile(mode='wb', delete=True)
        tmp_file_path = tmp_file.name
        tmp_file.write(file_contents)
        tmp_file.flush()

        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')

        s3_path = 's3://mybucket/remote_test_file'
        t = S3Target(s3_path,
                     client=s3_client,
                     remote_temp_write=True,
                     boto3_session_kwargs={
                         'region_name': 'us-east-1',
                         'aws_access_key_id': AWS_ACCESS_KEY,
                         'aws_secret_access_key': AWS_SECRET_KEY
                     })
        with open(tmp_file_path, 'rb') as source_file:
            with t.open('w') as write_file:
                for line in source_file:
                    write_file.write(line)

        self.assertTrue(s3_client.exists(s3_path))
        file_size = os.path.getsize(tmp_file.name)
        key_size = s3_client.get_key(s3_path).size
        self.assertEqual(file_size, key_size)

        tmp_file.close()
Esempio n. 12
0
    def test_remove(self):
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')

        self.assertRaises(
            S3ResponseError,
            lambda: s3_client.remove('s3://bucketdoesnotexist/file'))

        self.assertFalse(s3_client.remove('s3://mybucket/doesNotExist'))

        s3_client.put(self.tempFilePath, 's3://mybucket/existingFile0')
        self.assertTrue(s3_client.remove('s3://mybucket/existingFile0'))
        self.assertFalse(s3_client.exists('s3://mybucket/existingFile0'))

        self.assertRaises(InvalidDeleteException,
                          lambda: s3_client.remove('s3://mybucket/'))

        self.assertRaises(InvalidDeleteException,
                          lambda: s3_client.remove('s3://mybucket'))

        s3_client.put(self.tempFilePath, 's3://mybucket/removemedir/file')
        self.assertRaises(
            InvalidDeleteException,
            lambda: s3_client.remove('s3://mybucket/removemedir',
                                     recursive=False))

        # test that the marker file created by Hadoop S3 Native FileSystem is removed
        s3_client.put(self.tempFilePath, 's3://mybucket/removemedir/file')
        s3_client.put_string("", 's3://mybucket/removemedir_$folder$')
        self.assertTrue(s3_client.remove('s3://mybucket/removemedir'))
        self.assertFalse(
            s3_client.exists('s3://mybucket/removemedir_$folder$'))
Esempio n. 13
0
 def test_read(self):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     client.s3.create_bucket('mybucket')
     client.put(self.tempFilePath, 's3://mybucket/tempfile')
     t = S3Target('s3://mybucket/tempfile', client=client)
     read_file = t.open()
     file_str = read_file.read()
     self.assertEquals(self.tempFileContents, file_str)
Esempio n. 14
0
 def test_del(self):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     client.s3.create_bucket('mybucket')
     t = S3Target('s3://mybucket/test_del', client=client)
     p = t.open('w')
     print >> p, 'test'
     del p
     self.assertFalse(t.exists())
Esempio n. 15
0
 def test_close(self):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     client.s3.create_bucket('mybucket')
     t = S3Target('s3://mybucket/test_file', client=client)
     p = t.open('w')
     print >> p, 'test'
     self.assertFalse(t.exists())
     p.close()
     self.assertTrue(t.exists())
    def run(self):
        """
        Generate and print a URL where we can download the graph.
        """
        s3_client = S3Client()
        s3_key = s3_client.get_key(self.s3_path)
        download_url = s3_key.generate_url(expires_in=self.url_expires_in)

        logger.info('DOWNLOAD GRAPH AT: %s' % download_url)
Esempio n. 17
0
    def test_list_key(self):
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')

        s3_client.put_string("", 's3://mybucket/hello/frank')
        s3_client.put_string("", 's3://mybucket/hello/world')

        self.assertEqual([True, True],
                         [x.exists() for x in s3_client.list('s3://mybucket/hello', return_key=True)])
Esempio n. 18
0
    def test_list(self):
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')

        s3_client.put_string("", 's3://mybucket/hello/frank')
        s3_client.put_string("", 's3://mybucket/hello/world')

        self.assertEqual(['frank', 'world'],
                         list(s3_client.list('s3://mybucket/hello')))
Esempio n. 19
0
    def test_get_as_string(self):
        # put a file on s3 first
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')
        s3_client.put(self.tempFilePath, 's3://mybucket/putMe')

        contents = s3_client.get_as_string('s3://mybucket/putMe')

        self.assertEqual(contents, self.tempFileContents)
Esempio n. 20
0
    def _read_schema_file(self):
        s3Client = S3Client()
        if not s3Client.exists(self.s3_schema_path()):
            raise Exception("No schema file located at %s.  Can not set Redshift columns." % s3_schema_path)
        else:
            logger.info("Found schema file %s" % self.s3_schema_path())

        schema_key = s3Client.get_key(self.s3_schema_path())
        return schema_key.get_contents_as_string()
Esempio n. 21
0
 def test_write_cleanup_with_error(self):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     client.s3.create_bucket('mybucket')
     t = S3Target('s3://mybucket/test_cleanup2', client=client)
     try:
         with t.open('w'):
             raise Exception('something broke')
     except:
         pass
     self.assertFalse(t.exists())
Esempio n. 22
0
 def test_gzip(self):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     client.s3.create_bucket('mybucket')
     t = S3Target('s3://mybucket/gzip_test', luigi.format.Gzip,
                  client=client)
     p = t.open('w')
     test_data = 'test'
     p.write(test_data)
     self.assertFalse(t.exists())
     p.close()
     self.assertTrue(t.exists())
Esempio n. 23
0
    def test_write_cleanup_no_close(self):
        client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        client.s3.create_bucket('mybucket')
        t = S3Target('s3://mybucket/test_cleanup', client=client)

        def context():
            f = t.open('w')
            f.write('stuff')

        context()
        gc.collect()
        self.assertFalse(t.exists())
Esempio n. 24
0
    def test_init_with_environment_variables(self):
        os.environ['AWS_ACCESS_KEY_ID'] = 'foo'
        os.environ['AWS_SECRET_ACCESS_KEY'] = 'bar'
        # Don't read any exsisting config
        old_config_paths = configuration.LuigiConfigParser._config_paths
        configuration.LuigiConfigParser._config_paths = [tempfile.mktemp()]

        s3_client = S3Client()
        configuration.LuigiConfigParser._config_paths = old_config_paths

        self.assertEqual(s3_client.s3.gs_access_key_id, 'foo')
        self.assertEqual(s3_client.s3.gs_secret_access_key, 'bar')
Esempio n. 25
0
    def test_mkdir(self):
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')
        self.assertTrue(s3_client.isdir('s3://mybucket'))
        s3_client.mkdir('s3://mybucket')

        s3_client.mkdir('s3://mybucket/dir')
        self.assertTrue(s3_client.isdir('s3://mybucket/dir'))

        self.assertRaises(MissingParentDirectory,
                          s3_client.mkdir, 's3://mybucket/dir/foo/bar', parents=False)
        self.assertFalse(s3_client.isdir('s3://mybucket/dir/foo/bar'))
Esempio n. 26
0
def convert(lines, configuration):
    access_key_id = str(configuration['aws_access_key_id'])
    secret_access_key = str(configuration['aws_secret_access_key'])
    bucket = str(configuration['bucket'])
    cfg_filename = str(configuration.get('output_file', ''))

    s3_client = S3Client(access_key_id, secret_access_key)

    targets = {}

    for line in lines:
        try:
            data = json.loads(line)
        except Exception as e:
            raise Exception(errors.PARSING_ERROR % (line, e))

        if 'type' not in data:
            raise Exception(errors.MISSING_KEY_ERROR % ('type', line))

        data_type = data['type']

        if data_type == 'RECORD':
            if 'stream' not in data:
                raise Exception(errors.MISSING_KEY_ERROR % ('stream', line))

            filename = cfg_filename
            if filename == "":
                filename = data['stream'] + '.json'

            target_path = ('s3://{bucket}/{filename}'.format(
                bucket=bucket, filename=filename))

            record = data['record']
            print(targets)

            target = None
            if not target_path in targets.keys():
                target = S3Target(target_path, client=s3_client)
                targets[target_path] = {
                    'target': target,
                    'file': target.open('w')
                }

            target = targets[target_path]['target']

            targets[target_path]['file'].write(json.dumps(record) + '\n')

        else:
            l.WARN(errors.UNEXPECTED_MESSAGE_TYPE % (data['type'], data))

    for target_path in targets:
        targets[target_path]['file'].close()
Esempio n. 27
0
    def test_is_dir(self):
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')
        self.assertTrue(s3_client.is_dir('s3://mybucket'))

        s3_client.put(self.tempFilePath, 's3://mybucket/tempdir0_$folder$')
        self.assertTrue(s3_client.is_dir('s3://mybucket/tempdir0'))

        s3_client.put(self.tempFilePath, 's3://mybucket/tempdir1/')
        self.assertTrue(s3_client.is_dir('s3://mybucket/tempdir1'))

        s3_client.put(self.tempFilePath, 's3://mybucket/key')
        self.assertFalse(s3_client.is_dir('s3://mybucket/key'))
Esempio n. 28
0
    def test_get(self):
        # put a file on s3 first
        s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        s3_client.s3.create_bucket('mybucket')
        s3_client.put(self.tempFilePath, 's3://mybucket/putMe')

        tmp_file = tempfile.NamedTemporaryFile(delete=True)
        tmp_file_path = tmp_file.name

        s3_client.get('s3://mybucket/putMe', tmp_file_path)
        self.assertEqual(tmp_file.read(), self.tempFileContents)

        tmp_file.close()
Esempio n. 29
0
    def test_gzip_works_and_cleans_up(self):
        client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
        client.s3.create_bucket('mybucket')
        t = S3Target('s3://mybucket/gzip_test', luigi.format.Gzip,
                     client=client)
        test_data = b'123testing'
        with t.open('w') as f:
            f.write(test_data)

        with t.open() as f:
            result = f.read()

        self.assertEqual(test_data, result)
 def setUp(self, mock_config):
     f = tempfile.NamedTemporaryFile(mode='wb', delete=False)
     self.tempFileContents = "I'm a temporary file for testing\nAnd this is the second line\nThis is the third."
     f.write(self.tempFileContents)
     f.close()
     self.tempFilePath = f.name
     self.file_name = f.name[f.name.rindex('/') + 1:]
     self.local_path = f.name[:f.name.rindex('/')]
     self.s3_client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     bucket = self.s3_client.s3.create_bucket('bucket')
     k = Key(bucket)
     k.key = 'key/%s' % self.file_name
     mock_config.get_config.return_value.get.return_value = AWS_ACCESS_KEY
Esempio n. 31
0
 def is_empty(self):
     s3 = boto3.resource('s3')
     (bucket, key) = S3Client._path_to_bucket_and_key(self.path)
     return s3.ObjectSummary(bucket, key).size == 0