def post(self, news_id): news = g.News.objects.with_id(news_id) if not current_user.is_admin() and not current_user.has_any_permission('club', news.club.id, ['admin', 'news']): return abort(401) parser = reqparse.RequestParser() parser.add_argument('media', type=werkzeug.datastructures.FileStorage, location='files') args = parser.parse_args() uid = str(uuid4()) bucket = s3conn.get_bucket(current_app.config['AWS_S3_BUCKET']) key = Key(bucket) key.key = g.tenant + '/news/' + str(news.id) + '/' + uid key.content_type = args['media'].mimetype key.set_contents_from_file(args['media'].stream) key.make_public() news.update(add_to_set__medias=Media( name=uid, url='https://' + current_app.config['AWS_S3_BUCKET'] + '.s3.amazonaws.com/' + g.tenant + '/news/' + str(news.id) + '/' + uid )) return g.News.objects.with_id(news_id)
def save_file_to_s3(operation, params): if not waffle.switch_is_active('enable_s3'): print "S3 uploads are disabled" return ("complete", "S3 uploads temporarily disabled") statsd.incr("save_file_to_s3") conn = boto.connect_s3( settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) bucket = conn.get_bucket(settings.AWS_S3_UPLOAD_BUCKET) k = Key(bucket) # make a YYYY/MM/DD directory to put the file in source_file = open(params['tmpfilename'], "rb") n = datetime.now() key = "%04d/%02d/%02d/%s" % ( n.year, n.month, n.day, os.path.basename(params['tmpfilename'])) k.key = key k.set_contents_from_file(source_file) source_file.close() f = File.objects.create(video=operation.video, url="", cap=key, location_type="s3", filename=params['filename'], label="uploaded source file (S3)") OperationFile.objects.create(operation=operation, file=f) return ("complete", "")
def _upload_file_to_s3(self, bucket_name, keyname, filename, s3_key, s3_secret, s3_url, canned_acl='aws-exec-read'): if not has_euca: raise Exception("Euca2ools missing.. Required to run this function") s3euca = Euca2ool(is_s3=True) s3euca.ec2_user_access_key = s3_key s3euca.ec2_user_secret_key = s3_secret s3euca.url = s3_url conn = s3euca.make_connection() bucket_instance = _ensure_bucket(conn, bucket_name, canned_acl) k = Key(bucket_instance) k.key = keyname with open(filename, "rb") as the_file: try: logger.debug("Uploading File:%s to bucket:%s // key:%s" % (filename, bucket_name, keyname)) k.set_contents_from_file(the_file, policy=canned_acl) logger.debug("File Upload complete") except S3ResponseError, s3error: s3error_string = '%s' % (s3error) if s3error_string.find("403") >= 0: logger.exception("Permission denied while writing : %s\n%s" % (k.key, s3error))
def save_image_to_s3(filename): with open(os.path.join('/static/temp/' ,filename), 'r') as img: conn = S3Connection(current_app.config['AWS_ACCESS_KEY'], current_app.config['AWS_SECRET_KEY']) b = conn.create_bucket(current_app.config.get('AWS_BUCKET_NAME')) k = Key(b) k.set_contents_from_file(img) return k.key
def upload(self, data, key, content_type, headers=None, public=True): '''Uploads a file to S3 as the given key. :param data: the file data :type data: a file-like object or a :class:`str` :param str key: the name associated with the file (usually looks like a path). :param str content_type: The MIME type of the data. :param headers: Any extra headers associated with the file that will be sent any time the file is accessed. :type headers: :class:`dict` or :const:`None` :returns: the protocol-agnostic URL of the new file on S3. :rtype: :class:`str` ''' if not headers: headers = {} headers.update({ 'Content-Type': content_type, }) key = Key(self.bucket, key) if hasattr(data, 'read'): key.set_contents_from_file(data, headers=headers) else: key.set_contents_from_string(data, headers=headers) if public: key.set_acl('public-read') return '//%s.s3.amazonaws.com/%s' % (self.bucket_name, key.name)
def upload_stache(): import base64 import tempfile import boto from boto.s3.key import Key import re dataUrlPattern = re.compile('data:image/(png|jpeg);base64,(.*)$') conn = boto.connect_s3(app.config['AWS_KEY'], app.config['AWS_SECRET']) song_id = request.values.get('song_id') imgb64 = dataUrlPattern.match(request.values.get('stache')).group(2) data = base64.b64decode(imgb64) fp = tempfile.NamedTemporaryFile() # fp = open(song_id, 'w') fp.write(data) bucket = conn.get_bucket('staches') headers = {'Content-Type': 'image/png'} k = Key(bucket) k.key = "%s.png" % (song_id) k.set_contents_from_file(fp, headers=headers) k.set_acl('public-read') fp.close() r = get_redis() key = 'cache:data:%s' % (song_id) song = json.loads(r.get(key)) song['s3_url'] = "http://staches.s3.amazonaws.com/%s" % k.key song['stache_version'] = '0.1' r.set(key, json.dumps(song)) return song['s3_url']
def upload_parts(self, bucket_instance, directory, parts, part_to_start_from, canned_acl=None, upload_policy=None, upload_policy_signature=None): if part_to_start_from: okay_to_upload = False else: okay_to_upload = True headers = {} if upload_policy: headers['S3UploadPolicy'] = upload_policy if upload_policy_signature: headers['S3UploadPolicySignature']=upload_policy_signature for part in parts: if part == part_to_start_from: okay_to_upload = True if okay_to_upload: print 'Uploading part:', part k = Key(bucket_instance) k.key = part part_file = open(os.path.join(directory, part), 'rb') try: k.set_contents_from_file(part_file, policy=canned_acl, headers=headers) except S3ResponseError, s3error: s3error_string = '%s' % s3error if s3error_string.find('403') >= 0: msg = 'Permission denied while writing:', k.key else: msg = s3error_string self.display_error_and_exit(msg)
def admin(): form_upload = PhotoUpload() """PhotoProject(projectKey="noviprojekat", name="Novi projekat", description="opis drugog projekta", publish=False, placeNumber=2).save()""" listOfProjects = PhotoProject.objects print listOfProjects if form_upload.validate_on_submit(): #extracting photo related data from request photoName = form_upload.photoName.data uploadedphoto = request.files['photo'] projectkey = request.form['hiddenkey'] placeNumber = len(PhotoProject.objects.get(projectKey = projectkey).photos) + 1 #seting key and data for amazon S3 keyname = projectkey + "/" + photoName.replace(" ", "").lower() key = Key(bucket) key.key = keyname key.set_contents_from_file(uploadedphoto) #creting new Photo instance and adding it to parent PhotoProject newphoto = Photo(photoKey=photoName.replace(" ", "").lower(), name=photoName, placeNumber=placeNumber) photoproject = PhotoProject.objects.get(projectKey=projectkey) photoproject.photos.append(newphoto) photoproject.save() return render_template('photoupload.html' , form = form_upload , listOfProjects = listOfProjects)
def index(pin): s3_conn = S3Connection(AWS_KEY, AWS_SECRET) bucket = s3_conn.get_bucket('property-image-cache') s3_key = Key(bucket) s3_key.key = '{0}.jpg'.format(pin) if s3_key.exists(): output = BytesIO() s3_key.get_contents_to_file(output) else: image_viewer = 'http://www.cookcountyassessor.com/PropertyImage.aspx?pin={0}' image_url = image_viewer.format(pin) image = requests.get(image_url) print(image.headers) if 'image/jpeg' in image.headers['Content-Type']: output = BytesIO(image.content) s3_key.set_metadata('Content-Type', 'image/jpg') s3_key.set_contents_from_file(output) s3_key.set_acl('public-read') else: sentry.captureMessage('Could not find image for PIN %s' % pin) abort(404) output.seek(0) response = make_response(output.read()) response.headers['Content-Type'] = 'image/jpg' return response
def run_file_capture(file, sessionData): result = '' viprOnline = sessionData['viprOnline'] if viprOnline == 'True': print 'initializing ViPR system' #Config info to find ViPR in the vLab s3secret = sessionData['s3secret'] s3user = sessionData['s3user'] s3host = sessionData['s3host'] s3port = int(sessionData['s3port']) s3bucket = sessionData['s3bucket'] print s3secret print s3user print s3host conn = S3Connection(aws_access_key_id=s3user, aws_secret_access_key=s3secret, host=s3host, port=s3port, calling_format='boto.s3.connection.ProtocolIndependentOrdinaryCallingFormat', is_secure=False) print 'Listing all buckets for this user' print conn.get_all_buckets() mybucket = conn.get_bucket(s3bucket) mykey = Key(mybucket) mykey.key = '/user/hadoop/input/' + file.filename mykey.set_contents_from_file(file) result = Markup(result) return result
def upload(): if request.method == 'POST': file = request.files['file'] if file and allowed_file(file.filename): now = datetime.now() # Naming and storage to S3 database prefix = file.filename.rsplit('.', 1)[0] conn = S3Connection(ACCESS_KEY, SECRET_KEY) bkt = conn.get_bucket('chestcad') k = Key(bkt) k.key = prefix if istiff(file.filename): k.set_contents_from_file(file, headers={"Content-Type":"image/tiff"}) elif isjpg(file.filename): k.set_contents_from_file(file, headers={"Content-Type":"image/jpeg"}) elif ispng(file.filename): k.set_contents_from_file(file, headers={"Content-Type":"image/png"}) elif isdicom(file.filename): ds = dicom.read_file(file) pil_dcm = get_dicom_PIL(ds) pil_dcm_str = cStringIO.StringIO() pil_dcm.save(pil_dcm_str, format='tiff') pil_dcm_str.seek(0) k.set_contents_from_file(pil_dcm_str, headers={"Content-Type":"image/tiff"}) else: k.set_contents_from_file(file) # don't suspect that this will work return jsonify({"success":True, "file": file.filename}) # passes to upload.js, function uploadFinished
def _send_file_to_s3(self, description, file_meta_data, signal): key = Key(self._bucket, file_meta_data.getKeyName()) key.key = file_meta_data.getKeyName() key.set_contents_from_file(signal) file_meta_data.setDesc(description) print "Sending file {} to s3 with description {}".format(file_meta_data.getKeyName(),file_meta_data.getDesc())
def upload(): print "Upload function" if request.method == 'POST': file = request.files['file'] if file and allowed_file(file.filename): #now = datetime.now() sfile = file.filename.rsplit('.',1)[0].split('-'); exdel = int(sfile[-1]) vwdel = int(sfile[-2]) conn = S3Connection(ACCESS_KEY, SECRET_KEY) bkt = conn.get_bucket('rayvids') k = Key(bkt) ctime = datetime.now() keystr = ctime.strftime('%f') if exdel > 0: keystr = 'tmp/'+keystr if vwdel > 0: keystr = keystr+'_' k.key = keystr+'.mp4' k.set_contents_from_file(file) bkt.set_acl('public-read', k.key) url = URL_PREFIX+keystr+'.mp4#agreement' return jsonify({"success":True, "url": url})
def handle_profile_image(user, imageFile): key = Key(bucket) key.key = 'userid_' + user.id + '_' + str(int(time.time())) + '.png' key.set_contents_from_file(imageFile) profileImage = ProfileImage.objects.create(user=user, imageURL=key.key) profileImage.save() return profileImage.imageURL
def bucket_create(self, key, val, metadata_dict = {}): ''' Create an object in the bucket, but only if not yet present (save traffic). Parameters --------- key : str val : file-like object metadata_dict : dict Returns ------- Key ''' s3_key = Key(self.apk_bucket) s3_key.key = key # important: set metadata before actual upload s3_key.metadata = metadata_dict s3_key.content_type = 'application/vnd.android.package-archive' # upload log.debug("uploading %s", s3_key.key) s3_key.set_contents_from_file(val, replace = False) return s3_key
def backup(filename, **kwargs): log = kwargs.get("logger", app_logger) conf = kwargs.get("conf", None) bucket= get_bucket(conf) if not bucket: return log.info("Backing up " + filename) arcname = filename.split("/")[-1] out = StringIO() with tarfile.open(fileobj=out, mode="w:gz") as tar: tar.add(filename, arcname=arcname) password = kwargs.get("password") if not password: password = getpass() encrypted_out = StringIO() encrypt(out, encrypted_out, password) encrypted_out.seek(0) k = Key(bucket) k.key = arcname + datetime.now().strftime("%Y%m%d") + ".tgz.enc" k.set_contents_from_file(encrypted_out) k.set_acl("private")
def log_photo(): """ Endpoint for recording photos taken on the client. """ if not g.authorized: return access_denied() if not 'photo' in request.files: return api_error('no photo attached') f = request.files['photo'] ext = f.filename.rsplit('.', 1)[1] context = request.form['context'] if 'context' in request.form else None # Insert the photo into the database. cur = get_db().cursor() cur.execute('INSERT INTO logged_photos (aid, ext, context) VALUES(%s, %s, %s) RETURNING aid', (g.account.aid, ext, context)) pid = cur.fetchone()[0] # Send the image contents to S3 conn = S3Connection(current_app.config['AWS_ACCESS_KEY'], current_app.config['AWS_SECRET_KEY']) bucket = conn.get_bucket(current_app.config['S3_BUCKET']) k = Key(bucket) k.key = str(pid) k.set_contents_from_file(f) # Mark the file as saved cur = get_db().cursor() cur.execute('UPDATE logged_photos SET saved = %s WHERE pid = %s', (True, pid)) return jsonify(status='ok')
def deprecated__handle_truth( self, rs ): if self._mask is None: self._mask = rs.get_mask() rs.set_mask(self._mask) accuracy = rs.accuracy() with tempfile.SpooledTemporaryFile() as temp: np.save(temp, accuracy) temp.seek(0) conn = boto.connect_s3( ) bucket = conn.create_bucket( self.s3_results ) k = Key(bucket) m = hashlib.md5() m.update(accuracy) md5 = m.hexdigest() k.key = md5 k.set_contents_from_file( temp ) run_id = rs.get_run_id() try: item = Item( self.truth_table, {'run_id':run_id, 'strain_id': rs.spec_string} ) item['accuracy_file'] = md5 item['result_files'] = base64.b64encode( json.dumps( rs.get_result_files() ) ) item['bucket'] = self.s3_results item['timestamp'] = datetime.datetime.utcnow().strftime('%Y.%m.%d-%H:%M:%S') item.save() except ConditionalCheckFailedException as ccfe: print "*"*20 print ccfe if rs is not None: print {'run_id':run_id,'strain_id': rs.spec_string} print rs.get_result_files()
def _upload_to_s3(file_to_upload, path, name): ''' Upload file to S3 using provided keyname. Returns: public_url: URL to access uploaded file ''' if settings.S3_HOST is None: conn = S3Connection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY) else: conn = S3Connection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY, host=settings.S3_HOST, calling_format=OrdinaryCallingFormat()) bucketname = settings.S3_BUCKET bucket = conn.create_bucket(bucketname) prefix = getattr(settings, 'S3_PATH_PREFIX') path = '{0}/{1}'.format(prefix, path) k = Key(bucket) k.key = '{path}/{name}'.format(path=path, name=name) public_url = k.generate_url(60*60*24*365) # URL timeout in seconds. k.set_metadata('filename', file_to_upload.name) k.set_contents_from_file(file_to_upload) return public_url
def push_picture_to_s3(source,id): try: import boto from boto.s3.key import Key # set boto lib debug to critical bucket_name = settings.BUCKET_NAME print(bucket_name+'worked') # connect to the bucket conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID,settings.AWS_SECRET_ACCESS_KEY) bucket = conn.get_bucket(bucket_name) print(conn) print(settings.AWS_ACCESS_KEY_ID) # go through each version of the file key = 'user-%s.png' % id print(key) # fn = '/var/www/data/%s.png' % id # create a key to keep track of our file in the storage k = Key(bucket) k.key = key k.set_contents_from_file(source) # we need to make it public so it can be accessed publicly # using a URL like http://s3.amazonaws.com/bucket_name/key k.make_public() # remove the file from the web server except: print('error') pass
def put(self, filename, handle): """ Upload a distribution archive to the configured Amazon S3 bucket. If the :attr:`~.Config.s3_cache_readonly` configuration option is enabled this method does nothing. :param filename: The filename of the distribution archive (a string). :param handle: A file-like object that provides access to the distribution archive. :raises: :exc:`.CacheBackendError` when any underlying method fails. """ if self.config.s3_cache_readonly: logger.info('Skipping upload to S3 bucket (using S3 in read only mode).') else: timer = Timer() self.check_prerequisites() with PatchedBotoConfig(): from boto.s3.key import Key raw_key = self.get_cache_key(filename) logger.info("Uploading distribution archive to S3 bucket: %s", raw_key) key = Key(self.s3_bucket) key.key = raw_key try: key.set_contents_from_file(handle) except Exception as e: logger.info("Encountered error writing to S3 bucket, " "falling back to read only mode (exception: %s)", e) self.config.s3_cache_readonly = True else: logger.info("Finished uploading distribution archive to S3 bucket in %s.", timer)
def add_issue(): if request.method == 'POST': u = User.get_or_create(db.session, int(request.form['user'])) if not u.can_publish: return "", 403 date = dateutil.parser.parse(request.form['time']) date = date.astimezone(pytz.utc).replace(tzinfo=None) issue = Issue(reporter=u, title=request.form['title'], time=date, description=request.form['description'], urgency=int(request.form.get('urgency', 0))) db.session.add(issue) db.session.commit() pictures = request.files.getlist("pictures[]") # TODO: check extension for picture in pictures: k = Key(boto_bucket) k.set_contents_from_file(picture.stream) k.make_public() p = Picture(issue=issue, s3_name=k.name) db.session.add(p) db.session.commit() return "" else: return """
def store_fileobject(self, fileobject, path, interrupt_event=None): size = self.__get_size(fileobject) self.logger.info("Storing file object of size %s to %s", size, path) k = Key(self.bucket) k.key = path[1:] k.set_contents_from_file(fileobject) # does not return bytes written return int(time.time())
def upload_image(image_url, image_name): """аплоад изображения""" try: # соединение с S3 bucket connection = boto.connect_s3() bucket = connection.get_bucket(config.AWS_STORAGE_BUCKET_NAME) key = Key(bucket) # присвоение имени файла key.key = str(int(time())) + "-" + image_name + ".png" # чтение file_object = urllib2.urlopen(image_url) file_data = StringIO.StringIO(file_object.read()) # запись key.content_type = "image/png" key.set_contents_from_file(file_data) # права на чтение key.make_public() result_url = key.generate_url(0, expires_in_absolute=True, force_http=True, query_auth=False) return result_url except Exception, e: return e
def add_s3_file(filename, public=True, bucket='averrin'): b = get_bucket(bucket) k = Key(b) k.key = os.path.basename(filename) k.set_contents_from_file(file(filename, 'r')) if eval(str(public)): k.set_acl('public-read')
def _download_emails(self): """ Downloads emails to the specified folder from a file containing a new link on each line. :return: None, downloads files to folder. """ lines = open(self.__pdf_list_file, 'r').readlines() for idx, url in enumerate(lines): url_split = url.split("/") f = urllib2.urlopen(url) data = f.read() if self.__use_s3: bucket = self.__s3.get_bucket(self.__where_to_download.split("/")[0]) k = Key(bucket) where = self.__where_to_download.split("/")[1:].join("/") + "/" name = url_split[-1].split(".")[0] + ".pdf" with open("temp/" + name, "wb") as pdf: pdf.write(data) k.key = where + name k.set_contents_from_file(pdf) else: with open(os.path.join(self.__where_to_download, url_split[-1].split(".")[0] + ".pdf"), "wb") as pdf: pdf.write(data) print("\r" + str(float(idx) / len(lines) * 100) + "% done with download."),
def _write_string_to_s3(key_path, str): conn = boto.connect_s3() bucket = conn.get_bucket('el.epton.org') k = Key(bucket) k.key = key_path k.set_contents_from_file(StringIO.StringIO(str)) k.make_public()
def submit(): if len(request.files) < 1: return 'OK' name = request.form.get('name') if not name: abort(405) root = '%s/' % name for key in request.files: file = request.files[key] key = '%s%s' % (root, file.filename) key = Key(bucket=app.bucket, name=key) size = file.content_length headers = None (mimetype, encoding) = mimetypes.guess_type(file.filename) if encoding == 'gzip': mimetype = 'application/x-gzip' if mimetype: headers = { 'Content-Type': mimetype } key.set_contents_from_file(file.stream, size=size, headers=headers) file.close() key.close() return 'OK'
def process_photo(photo, record): if record.source in ("android", "iphone"): image = Image.open(photo) if image.size[0] > image.size[1]: temp = image.rotate(-90, Image.BILINEAR, expand=True) image = cStringIO.StringIO() temp.save(image, "jpeg") else: image = photo headers = { "Content-Type": "image/jpeg", "Expires": "%s GMT" % ( email.Utils.formatdate( time.mktime((datetime.datetime.now() + datetime.timedelta(days=365 * 2)).timetuple()) ) ), "Cache-Control": "public, max-age=%d" % (3600 * 24 * 365 * 2), } conn = S3Connection(settings.S3_CREDENTIALS["access_key"], settings.S3_CREDENTIALS["secret_key"]) bucket = conn.get_bucket(settings.S3_BUCKET) photo_filename = "%s/photo.jpg" % record._id key = Key(bucket=bucket, name=photo_filename) key.set_contents_from_file(image, headers=headers) key.set_acl("public-read") thumbnail_filename = "%s/thumbnail.jpg" % record._id key = Key(bucket=bucket, name=thumbnail_filename) key.set_contents_from_file(create_thumbnail(image), headers=headers) key.set_acl("public-read") record.photo_url = "http://%s/%s" % (settings.S3_BUCKET, photo_filename) record.thumbnail_url = "http://%s/%s" % (settings.S3_BUCKET, thumbnail_filename) record.save()
def _save(self, name, content): print name print content name = name.replace('\\', '/') key = Key(self.bucket, name) print key if hasattr(content, 'temporary_file_path'): print content.temporary_file_path() #mime = mimetypes.guess_type(content.temporary_file_path())[0] #print mime #key.key = filename #key.set_metadata("Content-Type", mime) #key.set_contents_from_filename(content.temporary_file_path()) elif isinstance(content, File): print "key.set_contents_from_file(content)" key.set_contents_from_file(content) key.set_acl("public-read") else: print "key.set_contents_from_string(content)" #key.set_contents_from_string(content) #key.set_acl("public-read") return name
def save_file(self, path, file_data): key = Key(self.bucket, path) key.set_contents_from_file(file_data.stream)
if e.code == 404: return '404 NOT FOUND', False, '' raise image_data = StringIO(image_file.read()) content_type = image_file.info()['content-type'] print 'Download complete: %s' % url # connect to S3 and save the resized image to the target bucket conn = boto.connect_s3(settings.aws_access_key_id, settings.aws_secret_access_key) bucket = conn.get_bucket(settings.target_bucket) k = Key(bucket) k.key = path print 'Saving to S3: %s' % path k.content_type = content_type k.set_contents_from_file(image_data) k.set_acl('public-read') print 'Save to S3 complete: %s' % path headers = [('Content-type', content_type)] return '200 OK', headers, image_data.getvalue() def app(environ, start_response): headers = False status, headers, data = cache_image(environ['PATH_INFO']) if not headers: # default headers in case of error headers = [('Content-type', 'text/plain')]
def multipart_upload(src, dest, num_processes=2, split=2, force=False, secure=False, reduced_redundancy=False, verbose=False, quiet=False, max_tries=5): # get bucket conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) try: bucket = conn.get_bucket(S3_BUCKET_NAME) except S3ResponseError: sys.stderr.write("There is no bucket with the name \"" + S3_BUCKET_NAME + "\" in your Amazon S3 account\n") sys.stderr.write( "Error: Please enter an appropriate bucket name and re-run the script\n" ) src.close() return # upload file to Amazon S3 # Determine the splits part_size = max(5 * 1024 * 1024, 1024 * 1024 * split) src.seek(0, 2) size = src.tell() num_parts = int(ceil(size / part_size)) # If file is less than 5M, just upload it directly if size < 5 * 1024 * 1024: src.seek(0) t1 = time.time() k = Key(bucket) k.key = dest k.set_contents_from_file(src) t2 = time.time() - t1 s = size / 1024. / 1024. log.info("Finished uploading %0.2fM in %0.2fs (%0.2fMBps)" % (s, t2, s / t2)) return # Create the multi-part upload object mpu = bucket.initiate_multipart_upload( dest, reduced_redundancy=reduced_redundancy) log.info("Initialized upload: %s" % mpu.id) # Generate arguments for invocations of do_part_upload def gen_args(num_parts, fold_last): for i in range(num_parts + 1): part_start = part_size * i if i == (num_parts - 1) and fold_last is True: yield (conn, bucket.name, mpu.id, src.name, i, part_start, part_size * 2, secure, max_tries, 0) break else: yield (conn, bucket.name, mpu.id, src.name, i, part_start, part_size, secure, max_tries, 0) # If the last part is less than 5M, just fold it into the previous part fold_last = ((size % part_size) < 5 * 1024 * 1024) # Do the thing try: # Create a pool of workers pool = Pool(processes=num_processes) t1 = time.time() pool.map_async(do_part_upload, gen_args(num_parts, fold_last)).get(9999999) # Print out some timings t2 = time.time() - t1 s = size / 1024. / 1024. # Finalize src.close() mpu.complete_upload() log.info("Finished uploading %0.2fM in %0.2fs (%0.2fMBps)" % (s, t2, s / t2)) except KeyboardInterrupt: log.warn("Received KeyboardInterrupt, canceling upload") pool.terminate() mpu.cancel_upload() except Exception, err: log.error("Encountered an error, canceling upload") log.error(err) mpu.cancel_upload()
def screenshot_url(self): self.flask.logger.debug("SCREENSHOTTER: STARTED") s3 = boto.connect_s3() bucket = s3.get_bucket(self.config.S3_BUCKET) target_width = self.config.IMG_WIDTH from product_identifier.models import URLScreenshot img_headers = {'Content-Disposition': "inline", "Content-Type": "image/png"} while True: try: data = self.scripts.pop_zset(keys=[SCREENSHOT_URLS_SET]) if not data: gevent.sleep(1) continue url_id, url = data.split("\t") url_id = int(url_id) url = url.decode("utf-8") driver = webdriver.PhantomJS(self.config.PHANTOM_PATH) driver.set_window_size(self.config.PHANTOM_WIDTH, self.config.PHANTOM_HEIGHT) driver.get(url) gevent.sleep(5) img_bytes = BytesIO(driver.get_screenshot_as_png()) driver.quit() img = Image.open(img_bytes).convert(mode="LA") factor = img.width / float(target_width) target_height = int(img.height / factor) img = img.resize((target_width, target_height)) file_like = BytesIO() img.save(file_like, format="png") out_bytes = file_like.read() file_like.seek(0) hsh = hashlib.sha1() hsh.update(out_bytes) digest = hsh.hexdigest() key_name = os.path.join(self.config.S3_KEY_PREFIX, "{}.png".format(digest)) key = Key(bucket) key.name = key_name key.set_contents_from_file(file_like, headers=img_headers) key.set_acl("public-read") img_url = key.generate_url(expires_in=0, query_auth=False) self.flask.logger.debug("SCREENSHOTTER: S3_OBJECT_URL {}".format(img_url)) sshot = URLScreenshot() sshot.url_id = url_id sshot.img_url = img_url try: self.db.session.add(sshot) self.db.session.commit() except: self.db.session.rollback() raise except: error = traceback.format_exc() self.flask.logger.error(error) gevent.sleep(0.25)
def upload_to_s3(bucket): fname = '/home/anuj/Pictures/test/hp.jpg' basename = os.path.basename(fname) key = Key(bucket, basename) with open(fname, 'rb') as f: key.set_contents_from_file(f)
# !/usr/bin/python3 import boto import requests import tempfile from boto.s3.key import Key conn = boto.s3.connect_to_region('eu-central-1', aws_access_key_id='[redacted_access_key]', aws_secret_access_key='[redacted_secret]') bucket = conn.lookup('[redacted_bucket]', validate=False) chunk_size = 1024 r = requests.get('[redacted_url]') fileTemp = tempfile.NamedTemporaryFile(delete = True) for chunk in r.iter_content(chunk_size): fileTemp.write(chunk) fileTemp.seek(0) k = Key(bucket) k.key = '/media/rfi/foobar' k.set_contents_from_file(fileTemp) fileTemp.close()
def upload_to_s3(aws_access_key_id, aws_secret_access_key, fname, bucket, key, callback=None, md5=None, reduced_redundancy=False, content_type=None, host='s3.eu-central-1.amazonaws.com'): """ XXX copied from somewher on stackoverflow. Hope to find it again. Uploads the given file to the AWS S3 bucket and key specified. callback is a function of the form: def callback(complete, total) The callback should accept two integer parameters, the first representing the number of bytes that have been successfully transmitted to S3 and the second representing the size of the to be transmitted object. Returns boolean indicating success/failure of upload. """ switch_validation = False if host is not None: if 'eu-central' in host: switch_validation = True os.environ['S3_USE_SIGV4'] = 'True' com = boto.connect_s3(aws_access_key_id, aws_secret_access_key, host=host) bucket = com.get_bucket(bucket, validate=True) s3_key = Key(bucket) s3_key.key = key if content_type: s3_key.set_metadata('Content-Type', content_type) with open(fname) as fid: try: size = os.fstat(fname.fileno()).st_size except: # Not all file objects implement fileno(), # so we fall back on this fid.seek(0, os.SEEK_END) size = fid.tell() sent = s3_key.set_contents_from_file( fid, cb=callback, md5=md5, reduced_redundancy=reduced_redundancy, rewind=True) # Rewind for later use fid.seek(0) if switch_validation: del os.environ['S3_USE_SIGV4'] if sent == size: return True return False
k.key = "thumbnails/%sx%s/%s" % (size[0], size[1], path) k.set_contents_from_string(output.getvalue()) k.make_public() output.close() # save original img if image is None and url: fd = urllib.request.urlopen(url) image = StringIO(fd.read()) else: image = StringIO(image) k.key = path k.set_contents_from_file(image) k.make_public() # make thumbnails if make_thumbnails: make_thumb(image) image.close() orig_url = "http://assets.maybi.cn/%s" % path return orig_url @celery.task def make_thumbnails(space, path, url, async=True): conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID,
def handle(self, *args, **options): from io import BytesIO import mimetypes import boto from boto.s3.key import Key if hasattr(settings, 'USE_S3_STORAGE') and settings.USE_S3_STORAGE: bucket_name = settings.AWS_STORAGE_BUCKET_NAME bucket_site_folder_name = settings.AWS_LOCATION conn = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY) bucket = conn.get_bucket(bucket_name) k = Key(bucket) static_root = settings.STATIC_ROOT static_url_to_find = settings.LOCAL_STATIC_URL static_url_replace_with = settings.STATIC_URL if os.path.isdir(static_root): # walk through the directory for dirpath, dirnames, filenames in os.walk(static_root): for filename in filenames: # skip the jquery and websymbols.css if filename.find( 'jquery' ) == -1 and filename != 'websymbols.css': if os.path.splitext(filename)[1] in [ '.js', '.css', '.less' ]: file_path = (os.path.join(dirpath, filename)).replace( '\\', '/') with open(file_path) as f: content = f.read() if content.find(static_url_to_find) != -1: new_content = content.replace( static_url_to_find, static_url_replace_with) # upload to s3 key = '%s/%s/%s' % ( bucket_site_folder_name, dirpath.replace( static_root, 'static'), filename) k.key = key content_type = mimetypes.guess_type( filename )[0] or k.DefaultContentType k.set_metadata('Content-Type', content_type) myfile = BytesIO(new_content) k.set_contents_from_file(myfile, replace=True) myfile.close() #k.set_contents_from_string(new_content, replace=True) k.set_acl('public-read') print(file_path) else: print('Site is not using S3 Storage.')
def store_file(self, name, fp): """ Stores file on AWS S3 from file pointer fp. """ k = Key(self.bucket, name) k.set_contents_from_file(fp) return self
from boto.s3.connection import S3Connection from boto.s3.key import Key sys.path.append( os.path.join(os.path.abspath(os.path.dirname(__file__)), os.path.pardir)) import settings conn = S3Connection() bucket = conn.get_bucket(settings.S3_BUCKET) base = sys.argv[1] for j in os.listdir(base): print "uploading", j k = Key(bucket) k.key = os.path.join(sys.argv[1], j) gzdata = cStringIO.StringIO() gzfile = gzip.GzipFile(fileobj=gzdata, mode="wb") gzfile.write(open(os.path.join(base, j)).read()) gzfile.close() gzdata.seek(0) k.set_metadata('Content-Type', 'application/json') k.set_metadata('Content-Encoding', 'gzip') k.set_contents_from_file(gzdata) k.set_acl('public-read')
def copy_data(department_source, department_destination, person_source=None, person_destination=None, qualifier_source=None, qualifier_destination=None): """Copies the latest storage unit in particular order. If all parameters are specified then the latest storage unit from department_source, matching the person_source and qualifier_source is copied. If qualifier_source and qualifier_destination are not specified then all of the latest storage units from department_source, matching the person_source are copied. If person_source and person_destnation are not specified then all of the latest storage unit from the department_source, matching the qualifier_source are copied. If only department_source and department_destination are specified then all of the latest storage units from the department_source are copied. Arguments: department_source(str): specifies the department to which the storage unit belongs. department_destination(str): specifies the department to which the storage unit belongs. person_source(str, optional): specifies the person to which the storage unit belongs. Defaults to None. person_destination(str, optional): specifies the person to which storage unit belongs. Defaults to None. qualifier_source(str, optional): specifies the qualifier to which storage unit belongs. Defaults to None. qualifier_destination(str, optional): specifies the qualifier to which storage unit belongs. Defaults to None. Returns: keys corresponding to storage unit which are copied. Exceptions: DepartmentValueError: raised when department is an empty string. PersonValueError: raised when person is an empty string. QualifierValueError: raised when qualifier is an empty string. BucketValueError: raised when bucket is an empty string or specified bucket does not exist. S3ValueError: raised when S3 credentials are incorrect. SocketValueError: raised when host parameter is incorrect. """ if not department_source: raise DepartmentValueError("DepartmentValueError: department_source" " cannot be empty string.") if not department_destination: raise DepartmentValueError("DepartmentValueError: " "department_destination cannot be empty" " string.") if person_source is not None and person_destination is not None: if not person_source: raise PersonValueError("PersonValueError: person_source cannot" " be empty string.") if not person_destination: raise PersonValueError("PersonValueError: person_destination" " cannot be empty string.") if person_source is not None and person_destination is None: if not person_source: raise PersonValueError("PersonValueError: person_source cannot" " be empty string.") if person_destination is not None and person_source is None: if not person_destination: raise PersonValueError("PersonValueError: person_destination " "cannot be empty string.") if (person_source and not person_destination or person_destination and not person_source): raise PersonValueError("PersonValueError: If provided then both " "person_source & person_destination " "should be provided.") if qualifier_source is not None and qualifier_destination is not None: if not qualifier_source: raise QualifierValueError("QualifierValueError: qualifier_source" " cannot be empty string.") if not qualifier_destination: raise QualifierValueError("QualifierValueError: qualifier" "destination cannot be empty string.") if qualifier_source is not None and qualifier_destination is None: if not qualifier_source: raise QualifierValueError("QualifierValueError: qualifier_source" " cannot be empty string.") if qualifier_destination is not None and qualifier_source is None: if not qualifier_destination: raise QualifierValueError("QualifierValueError: qualifier_" "destination cannot be empty string.") if (qualifier_source and not qualifier_destination or qualifier_destination and not qualifier_source): raise QualifierValueError("QualifierValueError: If provided then both" " qualifier_source & qualifier_destination" " should be provided.") keys = [] user = __get_value('user') passwd = __get_value('passwd') db_name = __get_value('db_name') tbl_name = __get_value('tbl_name') con = mdb.connect('localhost', user, passwd, db_name) cur = con.cursor() if (department_source and department_destination and person_source and person_destination and qualifier_source and qualifier_destination): dep_s = department_source per_s = person_source qual_s = qualifier_source query = "SELECT complete_path from %s where department = '%s'and \ person = '%s' and qualifier = '%s'" % (tbl_name, dep_s, per_s, qual_s) cur.execute(query) result = cur.fetchall() if not result: print "Record Not Found" else: source_path = [res[0] for res in result] dep_d = department_destination per_d = person_destination qual_d = qualifier_destination destination_path = ''.join([dep_d, "/", per_d, "/", qual_d]) hash_value_destination = __create_hash(destination_path) if FILE_STORE_TYPE == 'Unix': STORAGE_ROOT = __get_value('STORAGE_ROOT') os.chdir(STORAGE_ROOT) if not os.path.exists(str(hash_value_destination)): os.makedirs(str(hash_value_destination)) shutil.copy(source_path[0], str(hash_value_destination)) os.chdir(str(hash_value_destination)) filename = os.listdir(os.getcwd())[0] full_path = ''.join( [str(hash_value_destination), "/", filename]) key = __create_hash(full_path) keys.append(key) __insert_key(dep_d, per_d, qual_d, full_path, key) elif FILE_STORE_TYPE == 'S3': if not AWS_ACCESS_KEY_ID and not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Credentials cannot be" " empty string.") if not AWS_ACCESS_KEY_ID: raise S3ValueError("S3ValueError: Access Key Id cannot" " be empty string.") if not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Access Key Secret" " cannot be empty string") if not host: raise SocketValueError("SocketValueError: Host value" " cannot be empty string.") if not bucket_name: raise BucketValueError("BucketValueError: The bucket name" " cannot be empty string.") s3 = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET, host=host) try: bucket = s3.get_bucket(bucket_name) except boto.exception.S3ResponseError as e: if e.code == 'AccessDenied': raise S3ValueError("S3ValueError: Access denied as" " the credentials are incorrect.") if e.code == 'NoSuchBucket': raise BucketValueError("BucketValueError:No Such" " Bucket exists.") except Exception as e: if e.errno == -2: raise SocketValueError("SocketValueError: Check the" " value of host.") old_key = bucket.get_key(source_path[0]) old_key_name = source_path[0].split("/")[-1] ftemp = tempfile.NamedTemporaryFile(delete=True) old_key.get_contents_to_filename(ftemp.name) full_path = ''.join( [str(hash_value_destination), "/", old_key_name]) fp = open(ftemp.name, 'rb') new_key = Key(bucket) new_key.key = full_path new_key.set_contents_from_file(fp) fp.close() key = __create_hash(full_path) keys.append(key) __insert_key(dep_d, per_d, qual_d, full_path, key) else: print("Invalid Value for FILE STORE TYPE") elif (department_source and department_destination and qualifier_source and qualifier_destination): path_s = [] person_s = [] dep_s = department_source qual_s = qualifier_source query = "SELECT person, complete_path FROM %s WHERE department = '%s' \ and qualifier = '%s'" % (tbl_name, dep_s, qual_s) cur.execute(query) result = cur.fetchall() if not result: print "Record Not Found" else: for re in result: person_s.append(re[0]) path_s.append(re[1]) dep_d = department_destination qual_d = qualifier_destination for index in range(len(path_s)): destination_path = ''.join( [str(dep_d), "/", str(person_s[index]), "/", str(qual_d)]) hash_value_destination = __create_hash(destination_path) if FILE_STORE_TYPE == 'Unix': STORAGE_ROOT = __get_value('STORAGE_ROOT') os.chdir(STORAGE_ROOT) if not os.path.exists(str(hash_value_destination)): os.makedirs(str(hash_value_destination)) shutil.copy(path_s[index], str(hash_value_destination)) os.chdir(str(hash_value_destination)) filename = os.listdir(os.getcwd())[0] full_path = ''.join( [str(hash_value_destination), "/", filename]) key = __create_hash(full_path) keys.append(key) __insert_key(dep_d, person_s[index], qual_d, full_path, key) elif FILE_STORE_TYPE == 'S3': if not AWS_ACCESS_KEY_ID and not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Credentials cannot" " be empty string.") if not AWS_ACCESS_KEY_ID: raise S3ValueError("S3ValueError: Access Key Id" " cannot be empty string.") if not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Access Key Secret" " cannot be empty string") if not host: raise SocketValueError("SocketValueError: Host value" " cannot be empty string.") if not bucket_name: raise BucketValueError("BucketValueError: The bucket " "name cannot be empty string.") s3 = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET, host=host) try: bucket = s3.get_bucket(bucket_name) except boto.exception.S3ResponseError as e: if e.code == 'AccessDenied': raise S3ValueError("S3ValueError:Access denied as" " credentials are" " incorrect.") if e.code == 'NoSuchBucket': raise BucketValueError("BucketValueError: No Such" " Bucket exists.") except Exception as e: if e.errno == -2: raise SocketValueError("SocketValueError: Check" " the value of host.") old_key = bucket.get_key(path_s[index]) old_key_name = path_s[index].split("/")[-1] ftemp = tempfile.NamedTemporaryFile(delete=True) old_key.get_contents_to_filename(ftemp.name) full_path = ''.join( [str(hash_value_destination), "/", old_key_name]) fp = open(ftemp.name, 'rb') new_key = Key(bucket) new_key.key = full_path new_key.set_contents_from_file(fp) fp.close() key = __create_hash(full_path) keys.append(key) __insert_key(dep_d, person_s[index], qual_d, full_path, key) else: print("Invalid Value for FILE STORE TYPE") elif (department_source and department_destination and person_source and person_destination): path_s = [] qualifier_s = [] dep_s = department_source per_s = person_source query = "SELECT qualifier, complete_path from %s where \ department = '%s' and person = '%s'" % (tbl_name, dep_s, per_s) cur.execute(query) result = cur.fetchall() if not result: print "Result Not Found" else: for re in result: qualifier_s.append(re[0]) path_s.append(re[1]) dep_d = department_destination per_d = person_destination for index in range(len(path_s)): destination_path = ''.join([ str(dep_d), "/", str(per_d), "/", str(qualifier_s[index]) ]) hash_value_destination = __create_hash(destination_path) if FILE_STORE_TYPE == 'Unix': STORAGE_ROOT = __get_value('STORAGE_ROOT') os.chdir(STORAGE_ROOT) if not os.path.exists(str(hash_value_destination)): os.makedirs(str(hash_value_destination)) shutil.copy(path_s[index], str(hash_value_destination)) os.chdir(str(hash_value_destination)) filename = os.listdir(os.getcwd())[0] full_path = ''.join( [str(hash_value_destination), "/", filename]) key = __create_hash(full_path) keys.append(key) __insert_key(dep_d, per_d, qualifier_s[index], full_path, key) elif FILE_STORE_TYPE == 'S3': if not AWS_ACCESS_KEY_ID and not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Credentials cannot" " be empty string.") if not AWS_ACCESS_KEY_ID: raise S3ValueError("S3ValueError: Access Key Id cannot" " be empty string.") if not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Access Key Secret" " cannot be empty string") if not host: raise SocketValueError("SocketValueError: Host value" " cannot be empty string.") if not bucket_name: raise BucketValueError("BucketValueError: The bucket " "name cannot be empty string.") s3 = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET, host=host) try: bucket = s3.get_bucket(bucket_name) except boto.exception.S3ResponseError as e: if e.code == 'AccessDenied': raise S3ValueError("S3ValueError: Access denied" " as the credentials are" " incorrect.") if e.code == 'NoSuchBucket': raise BucketValueError("BucketValueError: No Such" " Bucket exists.") except Exception as e: if e.errno == -2: raise SocketValueError("SocketValueError: Check" " the value of host.") old_key = bucket.get_key(path_s[index]) old_key_name = path_s[index].split("/")[-1] ftemp = tempfile.NamedTemporaryFile(delete=True) old_key.get_contents_to_filename(ftemp.name) full_path = ''.join( [str(hash_value_destination), "/", old_key_name]) fp = open(ftemp.name, 'rb') new_key = Key(bucket) new_key.key = full_path new_key.set_contents_from_file(fp) fp.close() key = __create_hash(full_path) keys.append(key) __insert_key(dep_d, per_d, qualifier_s[index], full_path, key) else: print("Invalid Value for FILE STORE TYPE") elif (department_source and department_destination): path_s = [] person_s = [] qualifier_s = [] dep_s = department_source query = "SELECT person, qualifier, complete_path from %s \ WHERE department = '%s'" % (tbl_name, dep_s) cur.execute(query) result = cur.fetchall() if not result: print "Result Not Found" else: for re in result: person_s.append(re[0]) qualifier_s.append(re[1]) path_s.append(re[2]) dep_d = department_destination for index in range(len(path_s)): destination_path = ''.join([ str(dep_d), "/", str(person_s[index]), "/", str(qualifier_s[index]) ]) hash_value_destination = __create_hash(destination_path) if FILE_STORE_TYPE == 'Unix': STORAGE_ROOT = __get_value('STORAGE_ROOT') os.chdir(STORAGE_ROOT) if not os.path.exists(str(hash_value_destination)): os.makedirs(str(hash_value_destination)) shutil.copy(path_s[index], str(hash_value_destination)) os.chdir(str(hash_value_destination)) filename = os.listdir(os.getcwd())[0] full_path = ''.join( [str(hash_value_destination), "/", filename]) key = __create_hash(full_path) keys.append(key) __insert_key(dep_d, person_s[index], qualifier_s[index], full_path, key) elif FILE_STORE_TYPE == 'S3': if not AWS_ACCESS_KEY_ID and not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Credentials cannot" " be empty string.") if not AWS_ACCESS_KEY_ID: raise S3ValueError("S3ValueError:Access Key Id cannot" " be empty string.") if not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Access Key Secret" " cannot be empty string") if not host: raise SocketValueError("SocketValueError: Host value" " cannot be empty string.") if not bucket_name: raise BucketValueError("BucketValueError: The bucket " "name cannot be empty string.") s3 = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET, host=host) try: bucket = s3.get_bucket(bucket_name) except boto.exception.S3ResponseError as e: if e.code == 'AccessDenied': raise S3ValueError("S3ValueError:Access Denied as" " the credentials are" " incorrect.") if e.code == 'NoSuchBucket': raise BucketValueError("BucketValueError: No Such" " Bucket exists.") except Exception as e: if e.errno == -2: raise SocketValueError("SocketValueError: Check" " the value of host.") old_key = bucket.get_key(path_s[index]) old_key_name = path_s[index].split("/")[-1] ftemp = tempfile.NamedTemporaryFile(delete=True) old_key.get_contents_to_filename(ftemp.name) full_path = ''.join( [str(hash_value_destination), "/", old_key_name]) fp = open(ftemp.name, 'rb') new_key = Key(bucket) new_key.key = full_path new_key.set_contents_from_file(fp) fp.close() key = __create_hash(full_path) keys.append(key) __insert_key(dep_d, person_s[index], qualifier_s[index], full_path, key) else: print("Invalid Value for FILE STORE TYPE") con.close() return keys
def saveByteStringToAmazonS3(byteString, fileName, s3bucket): k = Key(s3bucket) k.key = fileName k.set_contents_from_file(BytesIO(byteString))
def archive(self, s3_bucket): with open(self.path, 'r') as fo: s3_key = Key(s3_bucket) s3_key.key = self.name logger.info('uploading %s to %s', fo.name, s3_key.key) s3_key.set_contents_from_file(fo)
args.region = 'ap-southeast-2' if args.version: if args.dest: path = os.path.dirname(args.dest) filename_parts = os.path.splitext(os.path.basename(args.dest)) path_with_filename = "%s/%s" % (path, filename_parts[0]) file_parts = (path_with_filename, filename_parts[1]) else: file_parts = os.path.splitext(os.path.basename(args.filename)) filename = "%s.%s%s" % (file_parts[0], args.version, file_parts[1]) else: if args.dest: filename = args.dest else: filename = os.path.basename(args.filename) conn = boto.s3.connect_to_region( args.region, calling_format=boto.s3.connection.OrdinaryCallingFormat()) bucket = conn.lookup(args.bucket) if bucket is None: bucket = conn.create_bucket(args.bucket) k = Key(bucket) k.key = filename k.set_contents_from_file(fp) #https://gist.github.com/nigeldunn/ea245d14eed17ee8c989
insert = sql_table.insert() headers = t.headers() rows = [dict(zip(headers, row)) for row in t.to_rows()] for row in rows: c.execute(str(insert), row) conn.commit() else: print 'Already saved report %s' % report_data['detail_url'] c.execute('select date_filed from reports order by date_filed limit 1') oldest_year = parser.parse(c.fetchone()[0]).year c.execute( 'select date_filed from reports order by date_filed desc limit 1') newest_year = parser.parse(c.fetchone()[0]).year c.execute('select * from reports limit 1') header = list(map(lambda x: x[0], c.description)) for year in range(oldest_year, newest_year + 1): oldest_date = '%s-01-01' % year newest_date = '%s-12-31' % year c.execute( 'select * from reports where date_filed >= ? and date_filed <= ?', (oldest_date, newest_date)) rows = c.fetchall() outp = StringIO() writer = UnicodeCSVWriter(outp) writer.writerow(header) writer.writerows(rows) outp.seek(0) k.key = 'Reports/%s.csv' % year k.set_contents_from_file(outp) k.make_public()
def store_data(department, person, qualifier, content): """Store the content as a properly classified date and time stamped retrievable storage unit. It calls search keys method to check if there already exists a key with given arguments and (if yes) overwrites the existing file else a new key is created. Arguments: department(str): specifies the department to which the storage unit belongs. person(str): specifies the person to which the storage unit belongs. qualifier(str): specifies the type of storage unit. content(file): points to the data stream from which the data is retrieved and stored. Returns: int: a logical key which can be used to retrieve data directly without going through a search. Exceptions: DepartmentValueError: raised when department is an empty string. PersonValueError: raised when person is an empty string. QualifierValueError: raised when qualifier is an empty string. BucketValueError: raised when bucket is an empty string or specified bucket does not exist. S3ValueError: raised when S3 credentials are incorrect. SocketValueError: raised when host parameter is incorrect. """ log.info("---inside store_data in storage.py----------") log.info(boto.Version) if not department: raise DepartmentValueError("DepartmentValueError: department cannot" " be empty string.") if not person: raise PersonValueError("PersonValueError: person cannot be empty" " string.") if not qualifier: raise QualifierValueError("QualifierValueError: qualifier cannot be" " empty string.") FILE_STORE_TYPE = __get_value('FILE_STORE_TYPE') flag = 0 if FILE_STORE_TYPE == 'Unix': key_from_table = search_keys(department, person, qualifier) flag = 0 if not key_from_table: path = ''.join([department, "/", person, "/", qualifier]) hash_value = __create_hash(path) STORAGE_ROOT = __get_value('STORAGE_ROOT') os.chdir(STORAGE_ROOT) if not os.path.exists(str(hash_value)): os.makedirs(str(hash_value)) os.chdir(str(hash_value)) temp = content.name if '/' in temp: parts = temp.split('/') flag = 1 if (flag == 1): filename = parts[-1] else: filename = temp text = content.readlines() with open(filename, 'wb') as f: for line in text: f.write(line) full_path = ''.join([str(hash_value), "/", filename]) key = __create_hash(full_path) __insert_key(department, person, qualifier, full_path, key) return key else: user = __get_value('user') passwd = __get_value('passwd') db_name = __get_value('db_name') tbl_name = __get_value('tbl_name') con = mdb.connect('localhost', user, passwd, db_name) cur = con.cursor() query = "SELECT complete_path FROM %s \ WHERE hash_key = %s" % (tbl_name, key_from_table[0]) cur.execute(query) result = cur.fetchall() path_from_table = [res[0] for res in result] STORAGE_ROOT = __get_value('STORAGE_ROOT') os.chdir(STORAGE_ROOT) text = content.readlines() with open(path_from_table[0], 'wb') as f: for line in text: f.write(line) con.close() return key_from_table elif FILE_STORE_TYPE == "S3": if not AWS_ACCESS_KEY_ID and not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Credentials cannot be empty" " string.") if not AWS_ACCESS_KEY_ID: raise S3ValueError("S3ValueError: Access Key Id cannot be empty" " string.") if not AWS_ACCESS_KEY_SECRET: raise S3ValueError("S3ValueError: Access Key Secret cannot be" " empty string") if not host: raise SocketValueError("SocketValueError: Host value cannot be" " empty string.") if not bucket_name: raise BucketValueError("BucketValueError: Bucket name cannot be" " empty string.") key_from_table = search_keys(department, person, qualifier) if not key_from_table: s3 = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET, host=host) try: log.info(bucket_name) log.info(s3.get_all_buckets()) bucket = s3.get_bucket(bucket_name) except boto.exception.S3ResponseError as e: if e.code == 'AccessDenied': raise S3ValueError("S3ValueError: Access denied as the" " credentials are incorrect.") if e.code == 'NoSuchBucket': raise BucketValueError("BucketValueError: No Such Bucket" " exists.") log.info(e) except Exception as e: if e.errno == -2: raise SocketValueError("SocketValueError: Check the value" " of host.") log.info(e) s3_key = Key(bucket) path = ''.join([department, "/", person, "/", qualifier]) hash_value = __create_hash(path) temp = content.name if '/' in temp: parts = temp.split('/') flag = 1 if (flag == 1): filename = parts[-1] else: filename = temp full_path = ''.join([str(hash_value), "/", filename]) s3_key.key = full_path log.info('++++++if part++++++++++') log.info(content) log.info(full_path) log.info(s3_key) s3_key.set_contents_from_file(content) key = __create_hash(full_path) __insert_key(department, person, qualifier, full_path, key) return key else: user = __get_value('user') passwd = __get_value('passwd') db_name = __get_value('db_name') tbl_name = __get_value('tbl_name') con = mdb.connect('localhost', user, passwd, db_name) cur = con.cursor() query = "SELECT complete_path FROM %s WHERE \ hash_key = %s" % (tbl_name, key_from_table[0]) cur.execute(query) result = cur.fetchall() path_from_table = [res[0] for res in result] s3 = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET, host=host) try: bucket = s3.get_bucket(bucket_name) except boto.exception.S3ResponseError as e: if e.code == 'AccessDenied': raise S3ValueError("S3ValueError: Access denied as the" " credentials are incorrect.") if e.code == 'NoSuchBucket': raise BucketValueError("BucketValueError: No Such Bucket" " exists.") except Exception as e: if e.errno == -2: raise SocketValueError("SocketValueError: Check the value" " of host.") s3_key = bucket.get_key(path_from_table[0]) log.info('++++++++++++++++') log.info(content) log.info(s3_key) s3_key.set_contents_from_file(content) con.close() return key_from_table else: print("Invalid Value for FILE STORE TYPE")
def upload_file(local_path, remote_path): conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) bucket = Bucket(conn, BUCKET_NAME) key = Key(bucket, remote_path) key.set_contents_from_file(file(local_path)) key.set_acl('public-read')
def snail_mail_bulk_pdf_task(pdf_name, get, **kwargs): """Save a PDF file for all open snail mail tasks""" # pylint: disable=too-many-locals # pylint: disable=unused-argument # pylint: disable=too-many-statements cover_info = [] bulk_merger = PdfFileMerger(strict=False) snails = SnailMailTaskFilterSet( get, queryset=SnailMailTask.objects.filter(resolved=False).order_by( '-amount', 'communication__foia__agency', ).preload_pdf(), ).qs[:100] blank_pdf = FPDF() blank_pdf.add_page() blank = StringIO(blank_pdf.output(dest='S')) for snail in snails.iterator(): # generate the pdf and merge all pdf attachments pdf = SnailMailPDF(snail.communication, snail.category, snail.amount) pdf.generate() single_merger = PdfFileMerger(strict=False) single_merger.append(StringIO(pdf.output(dest='S'))) files = [] for file_ in snail.communication.files.all(): if file_.get_extension() == 'pdf': try: pages = PdfFileReader(file_.ffile).getNumPages() single_merger.append(file_.ffile) files.append((file_, 'attached', pages)) except (PdfReadError, ValueError): files.append((file_, 'error', 0)) else: files.append((file_, 'skipped', 0)) single_pdf = StringIO() try: single_merger.write(single_pdf) except PdfReadError: cover_info.append((snail, None, files)) continue else: cover_info.append((snail, pdf.page, files)) # attach to the mail communication mail, _ = MailCommunication.objects.update_or_create( communication=snail.communication, defaults={ 'to_address': snail.communication.foia.address, 'sent_datetime': timezone.now(), }) single_pdf.seek(0) mail.pdf.save( '{}.pdf'.format(snail.communication.pk), ContentFile(single_pdf.read()), ) # append to the bulk pdf single_pdf.seek(0) bulk_merger.append(single_pdf) # ensure we align for double sided printing if PdfFileReader(single_pdf).getNumPages() % 2 == 1: blank.seek(0) bulk_merger.append(blank) # preprend the cover sheet cover_pdf = CoverPDF(cover_info) cover_pdf.generate() if cover_pdf.page % 2 == 1: cover_pdf.add_page() bulk_merger.merge(0, StringIO(cover_pdf.output(dest='S'))) bulk_pdf = StringIO() bulk_merger.write(bulk_pdf) bulk_pdf.seek(0) conn = S3Connection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY) bucket = conn.get_bucket(settings.AWS_STORAGE_BUCKET_NAME) key = Key(bucket) key.key = pdf_name key.set_contents_from_file(bulk_pdf) key.set_canned_acl('public-read')
def get_image(self, size="raw", fmt=None): """ Get the image file in the given size :param size: image size requested (see :attr:`pinhole.common.models.Photo.sizes`) :type size: str :param fmt: image format requested (jpeg, png, gif, etc) :type fmt: str :rtype: file-like object :returns: a file pointer to be read with the image transformed, if the `fp` is closed, the file will be automatically deleted """ if size not in self.sizes: raise ValueError("Size {} not supported".format(size)) if fmt is not None: warnings.warn("File format is not implemented yet") parsed = urlparse(self.s3_path) image_s3_path = self.gen_s3_key(path.basename(parsed.path), size) try: return s3.get_image("s3://%s/%s" % (parsed.hostname, image_s3_path)) except PinholeFileNotFound: pass s3conn = s3.S3Adapter() bucket = s3conn.get_bucket(parsed.hostname) if size == "raw": key = bucket.get_key(parsed.path.lstrip("/")) if key is not None: # we have the image! fpath = s3.get_cache_fpath(self.s3_path) tmp_image = open(fpath, "wb+") key.get_contents_to_file(tmp_image) tmp_image.flush() tmp_image.seek(0) return tmp_image # the image doesn't exist, so let's create it key = bucket.get_key(parsed.path.lstrip("/")) if key is None: raise ValueError("{} - {}".format(self.s3_path, parsed.path)) tmp_image = TemporaryFile(suffix=path.basename(parsed.path)) key.get_contents_to_file(tmp_image) tmp_image.flush() tmp_image.seek(0) img_obj = Image.open(tmp_image) s = self.sizes[size] img_obj.thumbnail((s, s), Image.ANTIALIAS) #upload the image image_s3_path = self.gen_s3_key(path.basename(parsed.path), size) cache_fpath = s3.get_cache_fpath(image_s3_path) print "using ", cache_fpath new_image = open(cache_fpath, "wb+") img_obj.save(new_image, format="JPEG") new_image.flush() new_image.seek(0) key = Key(bucket) key.key = image_s3_path key.set_contents_from_file(new_image) new_image.seek(0) return new_image
class S3utils(object): """ S3 Utils A simple user friendly interface to Amazon S3. S3 utils methods are made similar to Linux commands so it is easier to use/remember for simple file operations on S3 buckets. """ def __init__( self, AWS_ACCESS_KEY_ID=getattr(settings, "AWS_ACCESS_KEY_ID", ""), AWS_SECRET_ACCESS_KEY=getattr(settings, "AWS_SECRET_ACCESS_KEY", ""), AWS_STORAGE_BUCKET_NAME=getattr(settings, "AWS_STORAGE_BUCKET_NAME", ""), S3UTILS_DEBUG_LEVEL=getattr(settings, "S3UTILS_DEBUG_LEVEL", 0), AWS_HEADERS=getattr(settings, "AWS_HEADERS", {}), ): """ Parameters ---------- AWS_ACCESS_KEY_ID : string AWS Access key. If it is defined in your Django settings, it will grab it from there. Otherwise you need to specify it here. AWS_SECRET_ACCESS_KEY : string AWS secret. If it is defined in your Django settings, it will grab it from there. Otherwise you need to specify it here. AWS_STORAGE_BUCKET_NAME : string AWS Bucket name. If it is defined in your Django settings, it will grab it from there. Otherwise you need to specify it here. """ self.AWS_ACCESS_KEY_ID = AWS_ACCESS_KEY_ID self.AWS_SECRET_ACCESS_KEY = AWS_SECRET_ACCESS_KEY self.AWS_STORAGE_BUCKET_NAME = AWS_STORAGE_BUCKET_NAME self.S3UTILS_DEBUG_LEVEL = S3UTILS_DEBUG_LEVEL self.AWS_HEADERS = AWS_HEADERS self.conn = None self.conn_cloudfront = None # setting the logging level based on S3UTILS_DEBUG_LEVEL try: if (S3UTILS_DEBUG_LEVEL == 0): logger.setLevel(logging.ERROR) else: logger.setLevel(logging.INFO) except AttributeError: pass def __del__(self): if self.conn: self.disconnect() def printv(self, msg): if self.S3UTILS_DEBUG_LEVEL: print(msg) logger.info(msg) def connect(self): """ Establish the connection. This is done automatically for you. If you lose the connection, you can manually run this to be re-connected. """ self.conn = boto.connect_s3(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY, debug=self.S3UTILS_DEBUG_LEVEL) self.bucket = self.conn.get_bucket(self.AWS_STORAGE_BUCKET_NAME) self.k = Key(self.bucket) def disconnect(self): """ Close the connection. This is normally done automatically when the garbage collector is deleting s3utils object. """ self.bucket.connection.connection.close() self.conn = None def connect_cloudfront(self): "Connect to Cloud Front. This is done automatically for you when needed." self.conn_cloudfront = connect_cloudfront(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY, debug=self.S3UTILS_DEBUG_LEVEL) @connectit def mkdir(self, target_folder): """ Create a folder on S3. Examples -------- >>> s3utils.mkdir("path/to/my_folder") Making directory: path/to/my_folder """ self.printv("Making directory: %s" % target_folder) self.k.key = re.sub(r"^/|/$", "", target_folder) + "/" self.k.set_contents_from_string('') self.k.close() @connectit def rm(self, path): """ Delete the path and anything under the path. Example ------- >>> s3utils.rm("path/to/file_or_folder") """ list_of_files = list(self.ls(path)) if list_of_files: if len(list_of_files) == 1: self.bucket.delete_key(list_of_files[0]) else: self.bucket.delete_keys(list_of_files) self.printv("Deleted: %s" % list_of_files) else: logger.error("There was nothing to remove under %s", path) @connectit def __put_key(self, local_file, target_file, acl='public-read', del_after_upload=False, overwrite=True, source="filename"): """Copy a file to s3.""" action_word = "moving" if del_after_upload else "copying" try: self.k.key = target_file # setting the path (key) of file in the container if source == "filename": # grabs the contents from local_file address. Note that it loads the whole file into memory self.k.set_contents_from_filename(local_file, self.AWS_HEADERS) elif source == "fileobj": self.k.set_contents_from_file(local_file, self.AWS_HEADERS) elif source == "string": self.k.set_contents_from_string(local_file, self.AWS_HEADERS) else: raise Exception("%s is not implemented as a source." % source) self.k.set_acl(acl) # setting the file permissions self.k.close() # not sure if it is needed. Somewhere I read it is recommended. try: self.printv( "%s %s to %s" % (action_word, local_file, target_file)) except UnicodeDecodeError: self.printv( "%s %s to %s" % ( action_word, local_file.decode('utf-8', 'ignore'), target_file ) ) # if it is supposed to delete the local file after uploading if del_after_upload and source == "filename": try: os.remove(local_file) except: logger.error("Unable to delete the file: ", local_file, exc_info=True) return True except: logger.error("Error in writing to %s", target_file, exc_info=True) return False def cp_from_url( self, source_url, target_path, filename=None, acl='public-read', overwrite=True, invalidate=False ): """ Uploads a file from an specific url to s3. Parameters ---------- source_url : string Url of the file to be uploaded. target_path : string Target path on S3 bucket. filename : custom file name acl : string, optional File permissions on S3. Default is public-read options: - private: Owner gets FULL_CONTROL. No one else has any access rights. - public-read: Owners gets FULL_CONTROL and the anonymous principal is granted READ access. - public-read-write: Owner gets FULL_CONTROL and the anonymous principal is granted READ and WRITE access. - authenticated-read: Owner gets FULL_CONTROL and any principal authenticated as a registered Amazon S3 user is granted READ access. overwrite : boolean, optional overwrites files on S3 if set to True. Default is True invalidate : boolean, optional invalidates the CDN (a.k.a Distribution) cache if the file already exists on S3. default = False Note that invalidation might take up to 15 minutes to take place. It is easier and faster to use cache buster to grab lastest version of your file on CDN than invalidation. **Returns** Nothing on success but it will return what went wrong if something fails. Example -------- >>> s3utils.cp_from_url( "http://www.mysite/static/images/pic1.jpg", "/images/") copying <StringIO.StringIO instance at 0x7f36238872d8> to images/pic1.jpg """ result = None if overwrite: list_of_files = [] else: list_of_files = self.ls( folder=target_path, begin_from_file="", num=-1, get_grants=False, all_grant_data=False ) if filename: key = filename else: try: key = source_url.split('/')[-1] except IndexError: result = {'impossible_to_extract_file_name': source_url} logger.error( "it was not possible to extract the file name from: %s " .format(source_url) ) return result target_file = re.sub(r'^/', '', os.path.join(target_path, key)) try: file_object = urllib2.urlopen(source_url) except urllib2.HTTPError: result = {'url_not_found': source_url} logger.error( "file url not found".format(source_url) ) return result remote_file = cStringIO.StringIO(file_object.read()) if overwrite or (not overwrite and target_file not in list_of_files): success = self.__put_key( remote_file, target_file=target_file, acl=acl, overwrite=overwrite, source='fileobj' ) if not success: result = {'failed_to_copy_file': target_file} else: result = {'file_already_exits': target_file} logger.error("%s already exist. Not overwriting.", target_file) if overwrite and target_file in list_of_files and invalidate: self.invalidate(target_file) return result def cp_from_string( self, stringio_obj, name, target_path, acl='public-read', overwrite=True, invalidate=False ): """ Uploads a file loaded into an StringIO object to s3. Parameters ---------- stringio_ob : StringIO object StringIO object containing the file. name : string name to be used when saving the StringIO object target_path : string Target path on S3 bucket. acl : string, optional File permissions on S3. Default is public-read options: - private: Owner gets FULL_CONTROL. No one else has any access rights. - public-read: Owners gets FULL_CONTROL and the anonymous principal is granted READ access. - public-read-write: Owner gets FULL_CONTROL and the anonymous principal is granted READ and WRITE access. - authenticated-read: Owner gets FULL_CONTROL and any principal authenticated as a registered Amazon S3 user is granted READ access overwrite : boolean, optional overwrites files on S3 if set to True. Default is True invalidate : boolean, optional invalidates the CDN (a.k.a Distribution) cache if the file already exists on S3 default = False Note that invalidation might take up to 15 minutes to take place. It is easier and faster to use cache buster to grab lastest version of your file on CDN than invalidation. **Returns** Nothing on success but it will return what went wrong if something fails. Example -------- >>> import cStringIO >>> from PIL import Image >>> original_img = Image.open(..) >>> im = original_img.crop(left, upper, right, and lower) >>> im = im.resize(with, height, Image.ANTIALIAS) >>> img_string = cStringIO.StringIO() >>> im.save(img_string, "JPEG") >>> go_s3 = S3utils() >>> s3utils.cp_from_string( img_string, 'my_crop.jpg', '/img/crops/') """ result = None if overwrite: list_of_files = [] else: list_of_files = self.ls( folder=target_path, begin_from_file="", num=-1, get_grants=False, all_grant_data=False ) target_file = re.sub(r'^/', '', os.path.join(target_path, name)) if overwrite or (not overwrite and target_file not in list_of_files): success = self.__put_key( stringio_obj.getvalue(), target_file=target_file, acl=acl, overwrite=overwrite, source='string' ) if not success: result = {'failed_to_copy_file': target_file} else: result = {'file_already_exits': target_file} logger.error("%s already exist. Not overwriting.", target_file) if overwrite and target_file in list_of_files and invalidate: self.invalidate(target_file) return result def cp(self, local_path, target_path, acl='public-read', del_after_upload=False, overwrite=True, invalidate=False): """ Copy a file or folder from local to s3. Parameters ---------- local_path : string Path to file or folder. Or if you want to copy only the contents of folder, add /* at the end of folder name target_path : string Target path on S3 bucket. acl : string, optional File permissions on S3. Default is public-read options: - private: Owner gets FULL_CONTROL. No one else has any access rights. - public-read: Owners gets FULL_CONTROL and the anonymous principal is granted READ access. - public-read-write: Owner gets FULL_CONTROL and the anonymous principal is granted READ and WRITE access. - authenticated-read: Owner gets FULL_CONTROL and any principal authenticated as a registered Amazon S3 user is granted READ access del_after_upload : boolean, optional delete the local file after uploading. This is effectively like moving the file. You can use s3utils.mv instead of s3utils.cp to move files from local to S3. It basically sets this flag to True. default = False overwrite : boolean, optional overwrites files on S3 if set to True. Default is True invalidate : boolean, optional invalidates the CDN (a.k.a Distribution) cache if the file already exists on S3 default = False Note that invalidation might take up to 15 minutes to take place. It is easier and faster to use cache buster to grab lastest version of your file on CDN than invalidation. **Returns** Nothing on success but it will return what went wrong if something fails. Examples -------- >>> s3utils.cp("path/to/folder","/test/") copying /path/to/myfolder/test2.txt to test/myfolder/test2.txt copying /path/to/myfolder/test.txt to test/myfolder/test.txt copying /path/to/myfolder/hoho/photo.JPG to test/myfolder/hoho/photo.JPG copying /path/to/myfolder/hoho/haha/ff to test/myfolder/hoho/haha/ff >>> # When overwrite is set to False, it returns the file(s) that were already existing on s3 and were not overwritten. >>> s3utils.cp("/tmp/test3.txt", "test3.txt", overwrite=False) ERROR:root:test3.txt already exist. Not overwriting. >>> {'existing_files': {'test3.txt'}} >>> # To overwrite the files on S3 and invalidate the CDN (cloudfront) cache so the new file goes on CDN: >>> s3utils.cp("path/to/folder","/test/", invalidate=True) copying /path/to/myfolder/test2.txt to test/myfolder/test2.txt copying /path/to/myfolder/test.txt to test/myfolder/test.txt copying /path/to/myfolder/hoho/photo.JPG to test/myfolder/hoho/photo.JPG copying /path/to/myfolder/hoho/haha/ff to test/myfolder/hoho/haha/ff >>> # When file does not exist, it returns a dictionary of what went wrong. >>> s3utils.cp("/tmp/does_not_exist", "somewhere") ERROR:root:trying to upload to s3 but file doesn't exist: /tmp/does_not_exist >>> {'file_does_not_exist': '/tmp/does_not_exist'} """ result = None if overwrite: list_of_files = [] else: list_of_files = self.ls(folder=target_path, begin_from_file="", num=-1, get_grants=False, all_grant_data=False) # copying the contents of the folder and not folder itself if local_path.endswith("/*"): local_path = local_path[:-2] target_path = re.sub(r"^/|/$", "", target_path) # Amazon S3 doesn't let the name to begin with / # copying folder too else: local_base_name = os.path.basename(local_path) local_path = re.sub(r"/$", "", local_path) target_path = re.sub(r"^/", "", target_path) if not target_path.endswith(local_base_name): target_path = os.path.join(target_path, local_base_name) if os.path.exists(local_path): result = self.__find_files_and_copy(local_path, target_path, acl, del_after_upload, overwrite, invalidate, list_of_files) else: result = {'file_does_not_exist': local_path} logger.error("trying to upload to s3 but file doesn't exist: %s" % local_path) return result def __find_files_and_copy(self, local_path, target_path, acl='public-read', del_after_upload=False, overwrite=True, invalidate=False, list_of_files=[]): files_to_be_invalidated = [] failed_to_copy_files = set([]) existing_files = set([]) def check_for_overwrite_then_write(): if overwrite or (not overwrite and target_file not in list_of_files): success = self.__put_key( local_file, target_file=target_file, acl=acl, del_after_upload=del_after_upload, overwrite=overwrite, ) if not success: failed_to_copy_files.add(target_file) else: existing_files.add(target_file) logger.error("%s already exist. Not overwriting.", target_file) if overwrite and target_file in list_of_files and invalidate: files_to_be_invalidated.append(target_file) first_local_root = None # if it is a folder if os.path.isdir(local_path): for local_root, directories, files in os.walk(local_path): if not first_local_root: first_local_root = local_root # if folder is not empty if files: # iterating over the files in the folder for a_file in files: local_file = os.path.join(local_root, a_file) target_file = os.path.join( target_path + local_root.replace(first_local_root, ""), a_file ) check_for_overwrite_then_write() # if folder is empty else: target_file = target_path + local_root.replace(first_local_root, "") + "/" if target_file not in list_of_files: self.mkdir(target_file) if del_after_upload: rmtree(local_path) # if it is a file else: local_file = local_path target_file = target_path check_for_overwrite_then_write() if invalidate and files_to_be_invalidated: self.invalidate(files_to_be_invalidated) items = ('failed_to_copy_files', 'existing_files') local_vars = locals() result = {} for i in items: val = local_vars.get(i) if val: result[i] = val result = None if result == {} else result return result def echo(self, content, target_path, acl='public-read', overwrite=True, invalidate=False): """ Similar to Linux Echo command. Puts the string into the target path on s3 Parameters ---------- content : string The content to be put on the s3 bucket. target_path : string Target path on S3 bucket. acl : string, optional File permissions on S3. Default is public-read options: - private: Owner gets FULL_CONTROL. No one else has any access rights. - public-read: (Default) Owners gets FULL_CONTROL and the anonymous principal is granted READ access. - public-read-write: Owner gets FULL_CONTROL and the anonymous principal is granted READ and WRITE access. - authenticated-read: Owner gets FULL_CONTROL and any principal authenticated as a registered Amazon S3 user is granted READ access overwrite : boolean, optional overwrites files on S3 if set to True. Default is True invalidate : boolean, optional invalidates the CDN (a.k.a Distribution) cache if the file already exists on S3 default = False Note that invalidation might take up to 15 minutes to take place. It is easier and faster to use cache buster to serve the lastest version of your file on CDN than invalidation. **Returns:** Nothing on success, otherwise it returns what went wrong. Return type: dict Examples -------- >>> # On success returns nothing: >>> s3utils.echo("Hello World!","/test.txt") >>> # On failure returns what went wrong >>> s3utils.echo("Hello World!","/test/") {'InvalidS3Path': "path on S3 can not end in /"} """ result = None if target_path.endswith('/') or target_path.endswith('*'): result = {'InvalidS3Path': "Path on S3 can not end in /"} if not overwrite and not result: file_exists = self.ls(target_path) if file_exists: logger.error("%s already exist. Not overwriting.", target_path) result = {'existing_files': target_path} if content and not result: if isinstance(content, strings): result = self.__put_key(content, target_path, acl=acl, del_after_upload=False, overwrite=overwrite, source="string") else: result = {"TypeError": "Content is not string"} return result def mv(self, local_file, target_file, acl='public-read', overwrite=True, invalidate=False): """ Similar to Linux mv command. Move the file to the S3 and deletes the local copy It is basically s3utils.cp that has del_after_upload=True Examples -------- >>> s3utils.mv("path/to/folder","/test/") moving /path/to/myfolder/test2.txt to test/myfolder/test2.txt moving /path/to/myfolder/test.txt to test/myfolder/test.txt moving /path/to/myfolder/hoho/photo.JPG to test/myfolder/hoho/photo.JPG moving /path/to/myfolder/hoho/haha/ff to test/myfolder/hoho/haha/ff **Returns:** Nothing on success, otherwise what went wrong. Return type: dict """ self.cp(local_file, target_file, acl=acl, del_after_upload=True, overwrite=overwrite, invalidate=invalidate) @connectit def cp_cropduster_image(self, the_image_path, del_after_upload=False, overwrite=False, invalidate=False): """ Deal with saving cropduster images to S3. Cropduster is a Django library for resizing editorial images. S3utils was originally written to put cropduster images on S3 bucket. Extra Items in your Django Settings ----------------------------------- MEDIA_ROOT : string Django media root. Currently it is ONLY used in cp_cropduster_image method. NOT any other method as this library was originally made to put Django cropduster images on s3 bucket. S3_ROOT_BASE : string S3 media root base. This will be the root folder in S3. Currently it is ONLY used in cp_cropduster_image method. NOT any other method as this library was originally made to put Django cropduster images on s3 bucket. """ local_file = os.path.join(settings.MEDIA_ROOT, the_image_path) # only try to upload things if the origin cropduster file exists (so it is not already uploaded to the CDN) if os.path.exists(local_file): the_image_crops_path = os.path.splitext(the_image_path)[0] the_image_crops_path_full_path = os.path.join(settings.MEDIA_ROOT, the_image_crops_path) self.cp(local_path=local_file, target_path=os.path.join(settings.S3_ROOT_BASE, the_image_path), del_after_upload=del_after_upload, overwrite=overwrite, invalidate=invalidate, ) self.cp(local_path=the_image_crops_path_full_path + "/*", target_path=os.path.join(settings.S3_ROOT_BASE, the_image_crops_path), del_after_upload=del_after_upload, overwrite=overwrite, invalidate=invalidate, ) def __get_grants(self, target_file, all_grant_data): """ Return grant permission, grant owner, grant owner email and grant id as a list. It needs you to set k.key to a key on amazon (file path) before running this. note that Amazon returns a list of grants for each file. options: - private: Owner gets FULL_CONTROL. No one else has any access rights. - public-read: Owners gets FULL_CONTROL and the anonymous principal is granted READ access. - public-read-write: Owner gets FULL_CONTROL and the anonymous principal is granted READ and WRITE access. - authenticated-read: Owner gets FULL_CONTROL and any principal authenticated as a registered Amazon S3 user is granted READ access """ self.k.key = target_file the_grants = self.k.get_acl().acl.grants grant_list = [] for grant in the_grants: if all_grant_data: grant_list.append( {"permission": grant.permission, "name": grant.display_name, "email": grant.email_address, "id": grant.id}) else: grant_list.append({"permission": grant.permission, "name": grant.display_name}) return grant_list @connectit def chmod(self, target_file, acl='public-read'): """ sets permissions for a file on S3 Parameters ---------- target_file : string Path to file on S3 acl : string, optional File permissions on S3. Default is public-read options: - private: Owner gets FULL_CONTROL. No one else has any access rights. - public-read: Owners gets FULL_CONTROL and the anonymous principal is granted READ access. - public-read-write: Owner gets FULL_CONTROL and the anonymous principal is granted READ and WRITE access. - authenticated-read: Owner gets FULL_CONTROL and any principal authenticated as a registered Amazon S3 user is granted READ access Examples -------- >>> s3utils.chmod("path/to/file","private") """ self.k.key = target_file # setting the path (key) of file in the container self.k.set_acl(acl) # setting the file permissions self.k.close() @connectit def ls(self, folder="", begin_from_file="", num=-1, get_grants=False, all_grant_data=False): """ gets the list of file names (keys) in a s3 folder Parameters ---------- folder : string Path to file on S3 num: integer, optional number of results to return, by default it returns all results. begin_from_file: string, optional which file to start from on S3. This is usedful in case you are iterating over lists of files and you need to page the result by starting listing from a certain file and fetching certain num (number) of files. Examples -------- >>> from s3utils import S3utils >>> s3utils = S3utils( ... AWS_ACCESS_KEY_ID = 'your access key', ... AWS_SECRET_ACCESS_KEY = 'your secret key', ... AWS_STORAGE_BUCKET_NAME = 'your bucket name', ... S3UTILS_DEBUG_LEVEL = 1, #change it to 0 for less verbose ... ) >>> print(s3utils.ls("test/")) {u'test/myfolder/', u'test/myfolder/em/', u'test/myfolder/hoho/', u'test/myfolder/hoho/.DS_Store', u'test/myfolder/hoho/haha/', u'test/myfolder/hoho/haha/ff', u'test/myfolder/hoho/haha/photo.JPG'} """ # S3 object key can't start with / folder = re.sub(r"^/", "", folder) bucket_files = self.bucket.list(prefix=folder, marker=begin_from_file) # in case listing grants if get_grants: list_of_files = OrderedDict() for (i, v) in enumerate(bucket_files): file_info = {v.name: self.__get_grants(v.name, all_grant_data)} list_of_files.update(file_info) if i == num: break else: list_of_files = set([]) for (i, v) in enumerate(bucket_files): list_of_files.add(v.name) if i == num: break return list_of_files def ll(self, folder="", begin_from_file="", num=-1, all_grant_data=False): """ Get the list of files and permissions from S3. This is similar to LL (ls -lah) in Linux: List of files with permissions. Parameters ---------- folder : string Path to file on S3 num: integer, optional number of results to return, by default it returns all results. begin_from_file : string, optional which file to start from on S3. This is usedful in case you are iterating over lists of files and you need to page the result by starting listing from a certain file and fetching certain num (number) of files. all_grant_data : Boolean, optional More detailed file permission data will be returned. Examples -------- >>> from s3utils import S3utils >>> s3utils = S3utils( ... AWS_ACCESS_KEY_ID = 'your access key', ... AWS_SECRET_ACCESS_KEY = 'your secret key', ... AWS_STORAGE_BUCKET_NAME = 'your bucket name', ... S3UTILS_DEBUG_LEVEL = 1, #change it to 0 for less verbose ... ) >>> import json >>> # We use json.dumps to print the results more readable: >>> my_folder_stuff = s3utils.ll("/test/") >>> print(json.dumps(my_folder_stuff, indent=2)) { "test/myfolder/": [ { "name": "owner's name", "permission": "FULL_CONTROL" } ], "test/myfolder/em/": [ { "name": "owner's name", "permission": "FULL_CONTROL" } ], "test/myfolder/hoho/": [ { "name": "owner's name", "permission": "FULL_CONTROL" } ], "test/myfolder/hoho/.DS_Store": [ { "name": "owner's name", "permission": "FULL_CONTROL" }, { "name": null, "permission": "READ" } ], "test/myfolder/hoho/haha/": [ { "name": "owner's name", "permission": "FULL_CONTROL" } ], "test/myfolder/hoho/haha/ff": [ { "name": "owner's name", "permission": "FULL_CONTROL" }, { "name": null, "permission": "READ" } ], "test/myfolder/hoho/photo.JPG": [ { "name": "owner's name", "permission": "FULL_CONTROL" }, { "name": null, "permission": "READ" } ], } """ return self.ls(folder=folder, begin_from_file=begin_from_file, num=num, get_grants=True, all_grant_data=all_grant_data) @connectit_cloudfront def invalidate(self, files_to_be_invalidated): """ Invalidate the CDN (distribution) cache for a certain file of files. This might take up to 15 minutes to be effective. You can check for the invalidation status using check_invalidation_request. Examples -------- >>> from s3utils import S3utils >>> s3utils = S3utils( ... AWS_ACCESS_KEY_ID = 'your access key', ... AWS_SECRET_ACCESS_KEY = 'your secret key', ... AWS_STORAGE_BUCKET_NAME = 'your bucket name', ... S3UTILS_DEBUG_LEVEL = 1, #change it to 0 for less verbose ... ) >>> aa = s3utils.invalidate("test/myfolder/hoho/photo.JPG") >>> print(aa) ('your distro id', u'your request id') >>> invalidation_request_id = aa[1] >>> bb = s3utils.check_invalidation_request(*aa) >>> for inval in bb: ... print('Object: %s, ID: %s, Status: %s' % (inval, inval.id, inval.status)) """ if not isinstance(files_to_be_invalidated, Iterable): files_to_be_invalidated = (files_to_be_invalidated,) # Your CDN is called distribution on Amazaon. And you can have more than one distro all_distros = self.conn_cloudfront.get_all_distributions() for distro in all_distros: invalidation_request = self.conn_cloudfront.create_invalidation_request(distro.id, files_to_be_invalidated) return (distro.id, invalidation_request.id) @connectit_cloudfront def check_invalidation_request(self, distro, request_id): return self.conn_cloudfront.get_invalidation_requests(distro, request_id)