Esempio n. 1
0
def upload_new_files_to_bucket(glob_filename,
                               basename='',
                               client_obj=None,
                               bucket_prefix=None,
                               bucket_suffix=None):

    client_obj = maybe_populate_client_obj(client_obj)
    if client_obj is None:
        return

    bucket_name = lookup_bucket(client_obj,
                                prefix=bucket_prefix,
                                suffix=bucket_suffix)
    if bucket_name is None:
        return

    bucket = client_obj.get_bucket(bucket_name)

    blobs = bucket.list_blobs()
    blob_names = [b.name for b in blobs]
    for filepath in glob(path.join(basename, glob_filename)):
        filename = path.split(filepath)[-1]
        if filename not in blob_names:
            blob = storage.Blob(filename, bucket)
            with open(filepath, 'rb') as fp:
                blob.upload_from_file(fp)

    return client_obj
Esempio n. 2
0
def test_no_retry_on_keyboadinterrupt(b, collect):
    """Ensure that KeyboardInterrupts are forwarded."""
    blob_name = 'test-blob-name'
    k = storage.Blob(blob_name, b)

    # If vanilla KeyboardInterrupt is used, then sending SIGINT to the
    # test can cause it to pass improperly, so use a subtype instead.
    class MarkedKeyboardInterrupt(KeyboardInterrupt):
        pass

    collect.inject(MarkedKeyboardInterrupt('SIGINT, probably'))
    d = gs_deleter.Deleter()

    with pytest.raises(MarkedKeyboardInterrupt):
        d.delete(k)

        # Exactly when coroutines are scheduled is non-deterministic,
        # so spin while yielding to provoke the
        # MarkedKeyboardInterrupt being processed within the
        # pytest.raises context manager.
        while True:
            gevent.sleep(0.1)

    # Only one blob should have been aborted, since the purpose is to
    # *not* retry when processing KeyboardInterrupt.
    assert collect.aborted_blobs == [blob_name]

    # Turn off fault injection and flush/synchronize with close().
    collect.inject(None)
    d.close()

    # Since there is no retrying, no blobs should be deleted.
    assert not collect.deleted_blobs
Esempio n. 3
0
    def test_direct_write_and_read_into_file(self):
        blob = storage.Blob(bucket=self.bucket, name='MyBuffer')
        file_contents = b'Hello World'
        blob.upload_from_string(file_contents)
        self.case_blobs_to_delete.append(blob)

        same_blob = storage.Blob(bucket=self.bucket, name='MyBuffer')
        same_blob.reload()  # Initialize properties.
        temp_filename = tempfile.mktemp()
        with open(temp_filename, 'wb') as file_obj:
            same_blob.download_to_file(file_obj)

        with open(temp_filename, 'rb') as file_obj:
            stored_contents = file_obj.read()

        self.assertEqual(file_contents, stored_contents)
Esempio n. 4
0
def _upload_single(fpath):
    """
    Uploads single file to GCS. Returns a tuple containing
    (upload_success, fpath).
    """
    success = True

    try:
        client = _get_client('storage')
    except CouldNotObtainCredentialsError as e:
        logging.error('Could not obtain GCS credentials: {0}'.format(e))
        return False, fpath

    bucket = client.bucket(config.GCS_BUCKET)
    blob = storage.Blob(os.path.basename(fpath), bucket)

    try:
        blob.upload_from_filename(fpath)
        msg = 'Successfully uploaded {0} to GCS'
        logging.info(msg.format(fpath))

    except (GCloudError, GCloudStreamingError) as e:
        msg = '{0} failed to upload to GCS: {1}'
        logging.error(msg.format(fpath, e))
        success = False

    return success, fpath
Esempio n. 5
0
def update_bucket_status(timestamps,
                         basename='',
                         client_obj=None,
                         bucket_prefix=None,
                         bucket_suffix=None,
                         folder_prefixes=0):

    client_obj = maybe_populate_client_obj(client_obj)
    if client_obj is None:
        return

    bucket_name = lookup_bucket(client_obj,
                                prefix=bucket_prefix,
                                suffix=bucket_suffix)
    if bucket_name is None:
        return

    bucket = client_obj.get_bucket(bucket_name)

    blobs = bucket.list_blobs()
    blobs = [b for b in blobs]
    blob_filenames = [
        path.join(*([basename] + b.name.split('_')[:folder_prefixes] +
                    ['_'.join(b.name.split('_')[folder_prefixes:])]))
        for b in blobs
    ]
    blobs = dict(zip(blob_filenames, blobs))

    for key, timestamp in timestamps.items():
        key_parts = key.split('_')
        n_key_parts = len(key_parts)
        subpath = path.join(
            path.join(*key_parts[:folder_prefixes]) if folder_prefixes != 0
            else '', '_'.join(key_parts[folder_prefixes:])) + '*'
        for f in glob(
                path.join(basename, subpath,
                          *(['*'] * (folder_prefixes - n_key_parts)))):
            meta = blobs[f].metadata if f in blobs else None
            meta = (
                json.loads(meta['timestamp'],
                           object_hook=cse_json_decoding_hook)
                if 'timestamp' in meta else None) if meta is not None else None
            if (meta < timestamp) if meta is not None else True:
                blob = storage.Blob(
                    '_'.join(metasplit(f, folder_prefixes)[1:]), bucket)
                with open(f, 'rb') as fp:
                    blob.upload_from_file(fp)
                blob.metadata = dict(
                    timestamp=json.dumps(timestamp, cls=CSEJSONEncoder))
                blob.patch()

    for f, blob in blobs.items():
        if not path.exists(f):
            try:
                blob.delete()
            except NotFound as e:
                pass

    return client_obj
Esempio n. 6
0
    def test_create_signed_read_url(self):
        blob = storage.Blob(bucket=self.bucket, name='LogoToSign.jpg')
        expiration = int(time.time() + 5)
        signed_url = blob.generate_signed_url(expiration, method='GET')

        response, content = HTTP.request(signed_url, method='GET')
        self.assertEqual(response.status, 200)
        self.assertEqual(content, self.LOCAL_FILE)
Esempio n. 7
0
def _uri_to_blob(creds, uri, conn=None):
    assert uri.startswith('gs://')
    url_tup = urlparse(uri)
    bucket_name = url_tup.netloc
    if conn is None:
        conn = calling_format.connect(creds)
    b = storage.Bucket(conn, name=bucket_name)
    return storage.Blob(url_tup.path, b)
Esempio n. 8
0
    def setUp(self):
        super(TestStorageSignURLs, self).setUp()

        logo_path = self.FILES['logo']['path']
        with open(logo_path, 'rb') as file_obj:
            self.LOCAL_FILE = file_obj.read()

        blob = storage.Blob(bucket=self.bucket, name='LogoToSign.jpg')
        blob.upload_from_string(self.LOCAL_FILE)
        self.case_blobs_to_delete.append(blob)
Esempio n. 9
0
def test_processes_one_deletion(b, collect):
    # Mock up a blob and bucket
    blob_name = 'test-blob-name'
    k = storage.Blob(blob_name, b)

    d = gs_deleter.Deleter()
    d.delete(k)
    d.close()

    assert collect.deleted_blobs == [blob_name]
Esempio n. 10
0
    def test_copy_existing_file(self):
        filename = self.FILES['logo']['path']
        blob = storage.Blob('CloudLogo', bucket=self.bucket)
        blob.upload_from_filename(filename)
        self.case_blobs_to_delete.append(blob)

        new_blob = self.bucket.copy_blob(blob, self.bucket, 'CloudLogoCopy')
        self.case_blobs_to_delete.append(new_blob)

        base_contents = blob.download_as_string()
        copied_contents = new_blob.download_as_string()
        self.assertEqual(base_contents, copied_contents)
Esempio n. 11
0
    def test_small_file_write_from_filename(self):
        blob = storage.Blob(bucket=self.bucket, name='SmallFile')
        self.assertEqual(blob._properties, {})

        file_data = self.FILES['simple']
        blob.upload_from_filename(file_data['path'])
        self.case_blobs_to_delete.append(blob)

        md5_hash = blob.md5_hash
        if not isinstance(md5_hash, six.binary_type):
            md5_hash = md5_hash.encode('utf-8')
        self.assertEqual(md5_hash, file_data['hash'])
Esempio n. 12
0
    def test_create_signed_delete_url(self):
        blob = storage.Blob(bucket=self.bucket, name='LogoToSign.jpg')
        expiration = int(time.time() + 283473274)
        signed_delete_url = blob.generate_signed_url(expiration,
                                                     method='DELETE')

        response, content = HTTP.request(signed_delete_url, method='DELETE')
        self.assertEqual(response.status, 204)
        self.assertEqual(content, b'')

        # Check that the blob has actually been deleted.
        self.assertFalse(blob.name in self.bucket)
Esempio n. 13
0
    def test_write_metadata(self):
        filename = self.FILES['logo']['path']
        blob_name = os.path.basename(filename)

        blob = storage.Blob(blob_name, bucket=self.bucket)
        blob.upload_from_filename(filename)
        self.case_blobs_to_delete.append(blob)

        # NOTE: This should not be necessary. We should be able to pass
        #       it in to upload_file and also to upload_from_string.
        blob.content_type = 'image/png'
        self.assertEqual(blob.content_type, 'image/png')
Esempio n. 14
0
    def setUpClass(cls):
        super(TestStoragePseudoHierarchy, cls).setUpClass()
        # Make sure bucket empty before beginning.
        for blob in cls.bucket.list_blobs():
            blob.delete()

        simple_path = cls.FILES['simple']['path']
        blob = storage.Blob(cls.FILENAMES[0], bucket=cls.bucket)
        blob.upload_from_filename(simple_path)
        cls.suite_blobs_to_delete = [blob]
        for filename in cls.FILENAMES[1:]:
            new_blob = cls.bucket.copy_blob(blob, cls.bucket, filename)
            cls.suite_blobs_to_delete.append(new_blob)
Esempio n. 15
0
    def test_large_file_write_from_stream(self):
        blob = storage.Blob(bucket=self.bucket, name='LargeFile')
        self.assertEqual(blob._properties, {})

        file_data = self.FILES['big']
        with open(file_data['path'], 'rb') as file_obj:
            blob.upload_from_file(file_obj)
            self.case_blobs_to_delete.append(blob)

        md5_hash = blob.md5_hash
        if not isinstance(md5_hash, six.binary_type):
            md5_hash = md5_hash.encode('utf-8')
        self.assertEqual(md5_hash, file_data['hash'])
Esempio n. 16
0
def upload_blob():
    """Uploads a file to the bucket."""
    storage_client = storage.Client.from_service_account_json(
        os.path.join(os.getcwd(), 'crypto-trading-c8a8078ea295.json'))
    bucket = storage_client.get_bucket('stock-twits')
    blob_name = str(datetime.datetime.now().year) + str(
        datetime.datetime.now().month) + str(
            datetime.datetime.now().day) + '_twits.csv'
    blob = bucket.blob(blob_name)
    blob.upload_from_filename(os.path.join(os.getcwd(), 'twits/output.csv'))
    if storage.Blob(bucket=bucket, name=blob_name).exists(storage_client):
        os.remove(os.path.join(os.getcwd(), 'twits/output.csv'))

        print('File {} uploaded to {}.'.format(blob_name,
                                               'stock-twits bucket'))
Esempio n. 17
0
    def setUpClass(cls):
        super(TestStorageListFiles, cls).setUpClass()
        # Make sure bucket empty before beginning.
        for blob in cls.bucket.list_blobs():
            blob.delete()

        logo_path = cls.FILES['logo']['path']
        blob = storage.Blob(cls.FILENAMES[0], bucket=cls.bucket)
        blob.upload_from_filename(logo_path)
        cls.suite_blobs_to_delete = [blob]

        # Copy main blob onto remaining in FILENAMES.
        for filename in cls.FILENAMES[1:]:
            new_blob = cls.bucket.copy_blob(blob, cls.bucket, filename)
            cls.suite_blobs_to_delete.append(new_blob)
Esempio n. 18
0
def test_processes_many_deletions(b, collect):
    # Generate a target list of blobs in a stable order
    target = sorted(['test-blob-' + str(x) for x in range(20001)])

    # Construct blobs from the generated names and delete them all.
    blobs = [storage.Blob(blob_name, b) for blob_name in target]
    d = gs_deleter.Deleter()

    for k in blobs:
        d.delete(k)

    d.close()

    # Sort the deleted blob names to obtain another stable order and
    # then ensure that everything was passed for deletion
    # successfully.
    assert sorted(collect.deleted_blobs) == target
Esempio n. 19
0
    def _upload(self, task):
        """Upload the file to the google storage

        Parameters:
        :param task:  The task which define which folder (task.directory) should be uploaded
                      to which storage (task.id)
        """
        # We can not use the self.gcs cause we use it in a thread and if we use self.gcs an exception
        # is thrown -> Wrong SSL Version.
        gcs = storage.Client(self.project.project_id)

        # task.id is the uuid generated name for the bucket
        bucket = gcs.get_bucket(task.id)
        # Close the tar file
        task.tar_file.close()

        blob = storage.Blob(os.path.basename(task.file), bucket)
        with open(task.file, 'rb') as file:
            blob.upload_from_file(file)
Esempio n. 20
0
    def upload(self, fnames):
        uploaded_files = []

        bucket = self.storage.get_bucket(config.GCS_PROCESSED_PHOTOS_BUCKET)
        batch = self.datastore.batch()

        for fname in fnames:
            name, ext = os.path.splitext(fname)
            fpath = '{0}/{1}{2}'.format(constants.IMAGE_PROCESSOR_DATA_DIR,
                                        name, ext)
            objname = '{0}{1}'.format(name, ext)
            blob = storage.Blob(objname, bucket)
            try:
                blob.upload_from_file(open(fpath, "rb"))
                uploaded_files.append(fname)
                msg = 'Successfully uploaded {0} to Cloud Storage'
                logging.info(msg.format(fname))
            except Exception, e:
                msg = 'Failed to upload {0} to Cloud Storage: {1}'
                logging.error(msg.format(fname, e))
            else:
                # Update original photo entity
                photo_key = self.datastore.key(ds.DATASTORE_PHOTO, fname)
                photo_entity = self.datastore.get(photo_key)
                photo_entity.update({'processed': True})
                batch.put(photo_entity)

                # Create datastore entry for oriented image
                name, ext = os.path.splitext(fname)
                resized_fname = '{0}{1}'.format(name, ext)
                oriented_key = self.datastore.key(ds.DATASTORE_ORIENTED_IMAGE,
                                                  resized_fname)
                oriented_entity = datastore.Entity(oriented_key)
                oriented_entity['original_photo'] = photo_key
                oriented_entity['image_type'] = unicode(ds.TOTALITY_IMAGE_TYPE)
                oriented_entity[
                    ds.TOTALITY_ORDERING_PROPERTY] = random.random()
                batch.put(oriented_entity)
Esempio n. 21
0
    def upload(self, fnames):
        """
        Uploads a list of Movie entities to the datastore and uploads the
        corresponding movie files to Cloud Storage.
        """

        # Name movies based on time created
        movie_dir = datetime.now().strftime("%Y-%m-%d %H:%M")
        movie_name = 'movie-{0}.mp4'.format(movie_dir)

        # Upload movie to Cloud Storage
        bucket = self.storage.get_bucket(config.GCS_MOVIE_BUCKET)
        blob = storage.Blob('{0}/{1}'.format(movie_dir, movie_name), bucket)

        with open(constants.MOVIE_FPATH, 'r') as f:
            try:
                blob.upload_from_file(f)
                msg = 'Successfully uploaded {0} to Cloud Storage'
                logging.info(msg.format(constants.MOVIE_FPATH))
            except Exception, e:
                msg = 'Failed to upload {0} to Cloud Storage: {1}'
                logging.error(msg.format(constants.MOVIE_FPATH, e))
                return False
Esempio n. 22
0
def maybe_upload_file_to_bucket(filename,
                                basename='',
                                client_obj=None,
                                bucket_prefix=None,
                                bucket_suffix=None):

    client_obj = maybe_populate_client_obj(client_obj)
    if client_obj is None:
        return

    bucket_name = lookup_bucket(client_obj,
                                prefix=bucket_prefix,
                                suffix=bucket_suffix)
    if bucket_name is None:
        return

    bucket = client_obj.get_bucket(bucket_name)
    if bucket.get_blob(filename) is None:
        blob = storage.Blob(filename, bucket)
        with open(path.join(basename, filename), 'rb') as fp:
            blob.upload_from_file(fp)

    return client_obj
Esempio n. 23
0
def test_retry_on_normal_error(b, collect):
    """Ensure retries are processed for most errors."""
    blob_name = 'test-blob-name'
    k = storage.Blob(blob_name, b)

    collect.inject(Exception('Normal error'))
    d = gs_deleter.Deleter()
    d.delete(k)

    # Since delete_blob will fail over and over again, aborted_blobs
    # should grow quickly.
    while len(collect.aborted_blobs) < 2:
        gevent.sleep(0.1)

    # Since delete_blob has been failing repeatedly, no blobs should
    # be successfully deleted.
    assert not collect.deleted_blobs

    # Turn off fault injection and flush/synchronize with close().
    collect.inject(None)
    d.close()

    # The one enqueued job should have been processed.
    assert collect.deleted_blobs == [blob_name]
Esempio n. 24
0
connection = storage.get_connection()

# OK, now let's look at all of the buckets...
print(list(demo.list_buckets(connection)))  # This might take a second...

# Now let's create a new bucket...
bucket_name = ("bucket-%s" % time.time()).replace(".", "")  # Get rid of dots.
print(bucket_name)
bucket = demo.create_bucket(bucket_name, connection)
print(bucket)

# Let's look at all of the buckets again...
print(list(demo.list_buckets(connection)))

# How about we create a new blob inside this bucket.
blob = storage.Blob("my-new-file.txt", bucket=bucket)

# Now let's put some data in there.
blob.upload_from_string("this is some data!")

# ... and we can read that data back again.
print(blob.download_as_string())

# Now let's delete that blob.
print(blob.delete())

# And now that we're done, let's delete that bucket...
print(bucket.delete())

# Alright! That's all!
# Here's an interactive prompt for you now...
Esempio n. 25
0
class Pipeline():
    def __init__(self, datastore_client, storage_client):
        self.prev_fnames = []
        self.datastore = datastore_client
        self.storage = storage_client

    def scan(self):
        """
        Scans datastore for all <kind> entities. A list of all
        entity names is returned.
        """

        # Query datastore for all full disk OrientedImage entities, sorted by the adjusted
        # timestamp of the image (the ordering of the image in the megamovie).
        query = self.datastore.query(kind=ds.DATASTORE_ORIENTED_IMAGE, \
                                     order=[ds.TOTALITY_ORDERING_PROPERTY], \
                                     filters=[("image_type","=", ds.TOTALITY_IMAGE_TYPE)])

        # Fetch keys only, no need for other entity properties
        query.keys_only()

        # Retrieve all datstore entities. Query currently
        # has no limit & fetches all full disk totality images
        try:
            query = query.fetch()
        except Exception:
            msg = 'Failed to get {0} from Cloud Datastore.'
            logging.exception(msg.format(query))
            return None

        fnames = list(entity.key.name for entity in list(query))

        if self.prev_fnames == fnames:
            return []

        self.prev_fnames = fnames

        # Return list of filenames
        return fnames

    def assemble(self, fnames):
        """
        Stitches together movies from an ordered list of filenames.
        Downloads new files from GCS then feeds files to ffmpeg.
        Returns list of files sucessfully stitched into movie & calls stats func
        """

        # Get files from GCS
        pool = Pool(min(len(fnames), constants.MOVIE_DAEMON_MAX_PROCESSES))
        results = pool.map(get_file_from_gcs, fnames)
        pool.terminate()

        # Start ffmpeg subprocess
        ffmpeg_cmd = [
            "ffmpeg",
            "-y",  # Overwrite exsisting movie file
            "-f",
            "image2pipe",
            "-framerate",
            constants.MOVIE_FRAMERATE,
            "-vcodec",
            "mjpeg",
            "-i",
            "-",  # Input pipe from stdin
            "-vf",
            "scale=1024:-1",
            "-loglevel",
            "panic",
            "-vcodec",
            "libx264",
            constants.MOVIE_FPATH
        ]

        ffmpeg_ps = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)

        fnames = list(compress(fnames, results))
        files_read = self._pipe_to_ffmpeg(ffmpeg_ps, fnames)

        if files_read > constants.MOVIE_MIN_FRAMES:
            ffmpeg_ps.stdin.close()
            ffmpeg_ps.wait()
        else:
            ffmpeg_ps.kill()

        return fnames

    def upload(self, fnames):
        """
        Uploads a list of Movie entities to the datastore and uploads the
        corresponding movie files to Cloud Storage.
        """

        # Name movies based on time created
        movie_dir = datetime.now().strftime("%Y-%m-%d %H:%M")
        movie_name = 'movie-{0}.mp4'.format(movie_dir)

        # Upload movie to Cloud Storage
        bucket = self.storage.get_bucket(config.GCS_MOVIE_BUCKET)
        blob = storage.Blob('{0}/{1}'.format(movie_dir, movie_name), bucket)

        with open(constants.MOVIE_FPATH, 'r') as f:
            try:
                blob.upload_from_file(f)
                msg = 'Successfully uploaded {0} to Cloud Storage'
                logging.info(msg.format(constants.MOVIE_FPATH))
            except Exception, e:
                msg = 'Failed to upload {0} to Cloud Storage: {1}'
                logging.error(msg.format(constants.MOVIE_FPATH, e))
                return False

        if os.path.exists(constants.C_MAP_FPATH):
            map_name = 'map-{0}.png'.format(movie_dir)
            blob = storage.Blob('{0}/{1}'.format(movie_dir, map_name), bucket)

            with open(constants.C_MAP_FPATH, 'r') as c_map:
                try:
                    blob.upload_from_file(c_map)
                    msg = 'Successfully uploaded {0} to Cloud Storage'
                    logging.info(msg.format(map_name))
                except Exception, e:
                    msg = 'Failed to upload {0} to Cloud Storage: {1}'
                    logging.error(msg.format(constants.C_MAP_FPATH, e))