def get_most_wanted(): wanted = requests.get(MOST_WANTED, params={'max': 100}) if wanted.status_code == 200: s3conn = S3Connection(AWS_KEY, AWS_SECRET) bucket = s3conn.get_bucket('crime.static-eric.com') wanted_list = [] for person in wanted.json(): warrant = person['warrantNo'] wanted_list.append(warrant) mugs = requests.get(MUGSHOTS, params={'warrantNo': warrant}) person['mugs'] = [] if mugs.status_code == 200: for mug in mugs.json(): image_path = 'images/wanted/%s_%s.jpg' % (warrant, mug['mugshotNo']) k = Key(bucket) k.key = image_path k.set_contents_from_string(b64decode(mug['image'])) k.set_acl('public-read') person['mugs'].append({'angle': mug['mugshotNo'], 'image_path': image_path}) else: raise ClearPathError('ClearPath API returned %s when fetching mugshots for %s: %s' % (mugs.status_code, warrant, mugs.content[300:])) k = Key(bucket) k.key = 'data/wanted/%s.json' % warrant k.set_contents_from_string(json.dumps(person, indent=4)) k.set_acl('public-read') k = Key(bucket) k.key = 'data/wanted/wanted_list.json' k = k.copy(k.bucket.name, k.name, {'Content-Type':'application/json'}) k.set_acl('public-read') else: raise ClearPathError('ClearPath API returned %s when getting most wanted list: %s' % (wanted.status_code, wanted.content[300:]))
def upload_to_s3(site, bucket_name): s3_connection, bucket = connect_s3(bucket_name) # Upload content print 'Uploading content...' for path, content in site.s3_page_dict().items(): k = Key(bucket) k.key = path k.set_contents_from_string(_gzip(content), {'Content-Type': 'text/html', 'Content-Encoding': 'gzip'}) k.set_acl('public-read') print "Uploading media..." for root, dirs, files in os.walk(site.media_path): headers = {'Expires': expiry_date()} for f in files: file_path = os.path.join(root, f) file_key = file_path.replace(site.root_path, '')[1:] file_data = open(file_path, 'rb').read() content_type = mimetypes.guess_type(file_path)[0] if content_type: headers['Content-Type'] = content_type if content_type in GZIP_CONTENT_TYPES: headers['Content-Encoding'] = 'gzip' file_data = _gzip(file_data) asset = Key(bucket) asset.key = file_key asset.set_contents_from_string(file_data, headers) asset.set_acl('public-read') print 'Done!'
def delete_from_S3(self, file_name): """Delete files originally uploaded to S3 with the give filename. This is just based on a pattern from the config file, not an actual log. """ logging.debug("delete_from_S3 : %s" % file_name) key = self.settings["AMAZON_KEY"] secret = self.settings["AMAZON_SECRET"] bucket_name = self.settings["AMAZON_BUCKET"] conn = boto.connect_s3(key, secret) bucket = conn.get_bucket(bucket_name) image_infos = self.settings['IMAGE_INFO'] for image_info in image_infos: image_file_name = "%s%s.%s" % (file_name, image_info[2], image_info[3]) logging.debug( 'Deleting %s from Amazon S3 bucket %s' % (image_file_name, bucket_name)) k = Key(bucket) k.key = image_file_name bucket.delete_key(k) # delete our original image_file_name = "%s_o.png" % file_name logging.debug( 'Deleting %s from Amazon S3 bucket %s' % (image_file_name, bucket_name)) k = Key(bucket) k.key = image_file_name bucket.delete_key(k) return True
def upload2s3(): conn = S3Connection(AWS_ACCESS_KEY, AWS_SECRET_KEY) now = datetime.now() sevendaysbefore = now - timedelta(days=7) try: print 'createing bucket' bucket = conn.create_bucket('mongodbdump') print 'get key' k = Key(bucket) k.key = sevendaysbefore.date().isoformat() if k.exists(): print 'delete key', k.key k.delete() k.key = now.date().isoformat() if k.exists(): print 'delete key', k.key k.delete() options = mock.Mock() options.concurrency = 20 options.reduced_redundancy = False options.bucket = "mongodbdump" options.path = "." options.files = [ DUMP_FILE ] upload(options) except Exception, e: traceback.print_exc()
def _upload_s3(datafiles, job_id, bucket_name='infernyx'): rval = [] conn = boto.connect_s3() bucket = conn.get_bucket(bucket_name, validate=False) for tmp_file_list, _, tablename, columns in datafiles: s3_entries = [] for tmpfile in tmp_file_list: with open(tmpfile) as f: md5 = compute_md5(f) k = Key(bucket) k.key = "%s-%s" % (job_id, tmpfile) _log(job_id, "->S3 %s/%s" % (bucket_name, k.key)) k.set_contents_from_filename(tmpfile, md5=md5, replace=True) s3_entry = {"url": "s3://%s/%s" % (bucket_name, k.key), "mandatory": True} s3_entries.append(s3_entry) # upload the manifest prefix = tmp_file_list[0].rsplit('.')[0] manifest = ujson.dumps({"entries": s3_entries}) manifest_key = Key(bucket) manifest_key.key = "%s.%s.manifest" % (job_id, prefix) _log(job_id, "->S3 %s/%s: %s" % (bucket_name, manifest_key.key, manifest)) manifest_key.set_contents_from_string(manifest) # store manifest rval.append(DataFile(tmp_file_list, (bucket_name, manifest_key.key), tablename, columns)) return rval
def upload_to_s3(bucket_name, file_path, logger, prefix=None, access_key=None, secret_key=None, dry_run=False): valid = True result_string = "" s3_conn = _get_s3_connection(access_key, secret_key) if s3_conn.lookup(bucket_name): s3_key = Key(s3_conn.get_bucket(bucket_name)) if prefix: s3_key.key = os.path.join(prefix, os.path.basename(file_path)) else: s3_key.key = os.path.basename(file_path) def percent_cb(complete, total): if total is not 0: percentage = int(complete) * 100 / int(total) logger.write( "Uploading to S3: " + str(complete) + " / " + str(total) + " ( " + str(percentage) + "%)", multi_line=False, ) else: sys.stdout.write(".") sys.stdout.flush() if dry_run: result_string += "Skipping actual upload to S3 due to dry run.\n" else: s3_key.set_contents_from_filename(file_path, cb=percent_cb, num_cb=5) result_string += "Uploaded package from " + file_path + " to S3 bucket " + bucket_name + "\n" else: result_string += "Cannot find S3 bucket with name " + bucket_name + "\n" valid = False return {"valid": valid, "result_string": result_string}
def put_profile_pic(url, profile): """ Takes a url from filepicker and uploads it to our aws s3 account. """ try: r = requests.get(url) size = r.headers.get('content-length') if int(size) > 10000000: # greater than a 1mb #patlsotw return False filename, headers = urlretrieve( "%s/resize?w=600&h=600" % url) # store profile sized picture (40x40px) resize_filename, headers = urlretrieve( "%s/resize?w=40&h=40" % url) conn = S3Connection(env['AWS_ACCESS_KEY_ID'], env['AWS_SECRET_ACCESS_KEY']) b = conn.get_bucket(env['AWS_BUCK']) k = Key(b) k.key = md5.new(profile.user.username).hexdigest() k.set_contents_from_filename(filename) k.set_acl('public-read') k = Key(b) k.key = md5.new( "%sresize" % profile.user.username).hexdigest() k.set_contents_from_filename(resize_filename) k.set_acl('public-read') except: return False # update user profile return "http://s3.amazonaws.com/%s/%s" % ( env['AWS_BUCK'], k.key)
def export_job(request, job): account = request.user.account conn = S3Connection(aws_access_key_id=account.aws_access_key_id, aws_secret_access_key=account.aws_secret_access_key) bucket = conn.get_bucket('lx-pilot') key = Key(bucket) key.key = job.cache_key string = gzip.decompress(key.get_contents_as_string()) result = json.loads(string.decode('utf-8')) rows = result.get('rows') rows = [rm_dict_row(row) for row in rows] output = StringIO() writer = csv.writer(output) writer.writerows(rows) now = timezone.now() key_string = 'exports/' + str(now.year) + '/' + str(now.month) + '/' + str(now.day) + '/' + str(uuid.uuid4()) export = JobExport(job=job, created_by=request.user, key=key_string) key = Key(bucket) key.key = export.key key.set_metadata('Content-Type', 'text/csv') key.set_metadata('Content-Encoding', 'gzip') key.set_contents_from_string(gzip.compress(bytes(output.getvalue(), 'utf-8'))) key.close() key = Key(bucket) key.key = export.key export.save() return export
def sync_s3_files(self, changed_files=[], remove_files=[]): length = len(changed_files) for idx, path in enumerate(changed_files): name = path.replace(self.sync_dir + '/', '') print 'Sending...', idx + 1, ' of ', length, ' ', path, ' to ', name aws_key = Key(self.bucket) aws_key.key = name try: aws_key.set_contents_from_filename(path) except: # if the user gives up or a firefly falls into a resistor # Many an option, maybe delete MD5 and start over for the lazy # For the self motivated add some logic to remove changed files # in MD5 print 'Failed, probably remove MD5 in {0} and start over'.format( self.sync_dir) exit() length = len(remove_files) for idx, path in enumerate(remove_files): name = path.replace(self.sync_dir + '/', '') print 'Removing...', idx + 1, ' of ', length, ' ', name aws_key = Key(self.bucket) aws_key.key = name self.bucket.delete_key(aws_key)
def send(self, name, src, headers): '''Upload an object to S3 params name: name for the destination object src: generator of object data headers: Content-Type, Content-Encoding, Cache-Control''' # Create key key = Key(self._bucket) if self._directory: key.key = (self.sep).join((self._directory, name)) else: key.key = name # Headers for header_name, header_value in headers.items(): key.set_metadata(header_name, header_value) # Note: S3 already sets Etag fbuf = StringIO() # Temporary in-memory virtual file if headers.get('Content-Encoding', None) == 'gzip' and self._gzip: # Compressed zf = GzipFile(name, 'wb', 9, fbuf) zf.write(src) zf.close() else: # Plain fbuf.write(src) # Upload key.set_contents_from_file(fbuf, policy = 'public-read', reduced_redundancy = True)
def snapshot(url, fn, s3bucket=None): print "Capture %s" % url system("/opt/wkhtmltoimage-amd64 --crop-h 1024 %s '/tmp/%s.%s'" % (url, fn, ext)) print "- Create thumbnail" system("convert -resize 156x156 '/tmp/%s.%s' '/tmp/%s_thumb.%s'" % (fn, ext, fn, ext_thumb)) print "- Shrink original" system("mogrify -resize 1024x1024 '/tmp/%s.%s'" % (fn, ext)) if not s3bucket: return print "- Move into s3 bucket" k = Key(s3bucket) k.key = "%s.%s" % (fn, ext) k.set_contents_from_filename("/tmp/%s.%s" % (fn, ext)) k.set_acl('public-read') k = Key(s3bucket) k.key = "%s_thumb.%s" % (fn, ext_thumb) k.set_contents_from_filename("/tmp/%s_thumb.%s" % (fn, ext_thumb)) k.set_acl('public-read') system("rm /tmp/%s.%s" % (fn, ext)) system("rm /tmp/%s_thumb.%s" % (fn, ext_thumb))
def _get_data_files(self): """ Retrieves metadata and parsed dataframe files (generated by utilities/hddata_process.py) from S3 """ s2f = self._s3_to_fname while not op.exists(op.join( self.working_dir, s2f(self.meta_file))): try: conn = boto.connect_s3() b = conn.get_bucket(self.ds_bucket) k = Key(b) k.key = self.meta_file k.get_contents_to_filename( op.join( self.working_dir, s2f(self.meta_file))) except: time.sleep(random.random()) while not op.exists(op.join( self.working_dir, s2f(self.data_file))): conn = boto.connect_s3() try: b = conn.get_bucket(self.ds_bucket) k = Key(b) k.key = self.data_file k.get_contents_to_filename( op.join( self.working_dir, s2f(self.data_file)) ) except S3ResponseError: self.logger.exception( 'Master has not generated files' ) raise except OSError: time.sleep(random.random())
def build(): utc = datetime.utcnow() from_zone = tz.gettz('UTC') to_zone = tz.gettz('America/Los_Angeles') utc = utc.replace(tzinfo=from_zone) la_time = utc.astimezone(to_zone) os.chdir(POETROID_PATH) os.system("git pull origin master") os.chdir(POEMS_PATH) os.system("git pull origin master") os.chdir(FROZEN_PIE_PATH) os.system("../env/bin/python pie.py --config " + POETROID_PATH + os.sep + CONFIG) os.chdir(POETROID_PATH) print 'Uploading %s to Amazon S3 bucket %s' % (INDEX_HTML, BUCKET_NAME) k = Key(BUCKET) k.key = 'index.html' k.set_contents_from_filename(INDEX_HTML) for jsfile in glob(JS_DIR + os.sep + "*.js"): k = Key(BUCKET) filename = os.path.basename(jsfile) k.key = filename k.set_contents_from_filename(jsfile) update_yaml() deploy_time = 'Deployed at ' + str(la_time) + "\n" with open(LOG_FILE, "a") as mylog: mylog.write(deploy_time) return deploy_time
def addPhoto(photo, setTitle): url = flickr.photos_getSizes(photo_id = photo.attrib['id']) realUrl = None for url in url.find('sizes').findall('size'): if url.attrib['label'] == "Original": realUrl = url.attrib['source'] if realUrl: keyId = setTitle + "/" + photo.attrib['id'] + ".jpg" dataKeyId = keyId + ".metadata" # Upload photo if bucket.get_key(keyId) is None: print "%s not found on S3; uploading" % keyId f, h = urllib.urlretrieve(realUrl, reporthook = makeFlickrCallback()) key = Key(bucket) key.key = keyId print "Uploading %s to %s/%s" % (photo.attrib['title'], bucket.name, key.key) key.set_metadata('flickrInfo', key.key + ".metadata") key.set_metadata('inFlickrSet', set.attrib['id']) key.set_contents_from_filename(f, cb = makeBotoCallback()) os.unlink(f) # Upload metadata if bucket.get_key(dataKeyId) is None: print "%s not found on S3, setting metadata" % dataKeyId photoInfo = flickr.photos_getInfo(photo_id = photo.attrib['id'], format = "rest") key = Key(bucket) key.key = dataKeyId key.set_contents_from_string(photoInfo)
def publicUrlTest(): result = 0 obj = dsslib.getConnection(CALLER) b1 = obj.create_bucket('urlbucket1') k = Key(b1) k.key = 'obj1' k.set_contents_from_string('Data of URL object') print "Setting ACL on obj" k.set_acl('public-read') print "Setting ACL on bucket" b1.set_acl('public-read') m = Key(b1) m.key = 'obj1' urlname = m.generate_url(1000) print "\nThe obj URL is: " + str(urlname) urlname = b1.generate_url(1000) print "\nThe bucket URL is: " + str(urlname) for i in range(1, 21): time.sleep(1) if i % 5 == 0: print str(20 - i) + " Seconds left before Obj deletion" m.delete() print "Object deleted\n" for i in range(1, 21): time.sleep(1) if i % 5 == 0: print str(20 - i) + " Seconds left before bucket deletion" obj.delete_bucket('urlbucket1') print "Bucket deleted\n" return result
def test_get_all_keys(): conn = boto.connect_s3('the_key', 'the_secret') bucket = conn.create_bucket("foobar") key = Key(bucket) key.key = "the-key" key.set_contents_from_string("some value") key2 = Key(bucket) key2.key = "folder/some-stuff" key2.set_contents_from_string("some value") key3 = Key(bucket) key3.key = "folder/more-folder/foobar" key3.set_contents_from_string("some value") key4 = Key(bucket) key4.key = "a-key" key4.set_contents_from_string("some value") keys = bucket.get_all_keys() keys.should.have.length_of(3) keys[0].name.should.equal("a-key") keys[1].name.should.equal("the-key") # Prefix keys[2].name.should.equal("folder") keys = bucket.get_all_keys(prefix="folder/") keys.should.have.length_of(2) keys[0].name.should.equal("folder/some-stuff") keys[1].name.should.equal("folder/more-folder")
def update_projects(): conn = S3Connection(AWS_KEY, AWS_SECRET) bucket = conn.get_bucket(BUCKET) pj_list = Key(bucket) pj_list.key = 'projects.json' project_list = json.loads(pj_list.get_contents_as_string()) pj_list.close() details = [] for project_url in project_list: try: pj_details = update_project(project_url) except IOError: return 'Github is throttling. Just gonna try again after limit is reset.' if pj_details: details.append(pj_details) pj_details = Key(bucket) pj_details.key = 'project_details.json' pj_details.set_contents_from_string(json.dumps(details)) pj_details.set_metadata('Content-Type', 'application/json') pj_details.set_acl('public-read') pj_details.close() people_list = Key(bucket) people_list.key = 'people.json' people_list.set_contents_from_string(json.dumps(get_people_totals(details))) people_list.set_metadata('Content-Type', 'application/json') people_list.set_acl('public-read') people_list.close() org_list = Key(bucket) org_list.key = 'organizations.json' org_list.set_contents_from_string(json.dumps(get_org_totals(details))) org_list.set_metadata('Content-Type', 'application/json') org_list.set_acl('public-read') org_list.close() return 'Updated'
def upload_to_s3(job, job_vars): """ If s3_dir is specified in arguments, file will be uploaded to S3 using boto. WARNING: ~/.boto credentials are necessary for this to succeed! job_vars: tuple Tuple of dictionaries: input_args and ids """ import boto from boto.s3.key import Key input_args, ids = job_vars work_dir = job.fileStore.getLocalTempDir() uuid = input_args['uuid'] # Parse s3_dir s3_dir = input_args['s3_dir'] bucket_name = s3_dir.split('/')[0] bucket_dir = '/'.join(s3_dir.split('/')[1:]) # Upload to S3 via boto conn = boto.connect_s3() bucket = conn.get_bucket(bucket_name) k = Key(bucket) if 'error.txt' in ids: read_from_filestore(job, work_dir, ids, 'error.txt') k.key = os.path.join(bucket_dir, uuid + '.ERROR') k.set_contents_from_filename(os.path.join(work_dir, 'error.txt')) else: read_from_filestore(job, work_dir, ids, 'uuid.tar.gz') uuid_tar = os.path.join(work_dir, 'uuid.tar.gz') if 'R.fastq' in ids: k.key = os.path.join(bucket_dir, uuid + 'single-end' + '.tar.gz') else: k.key = os.path.join(bucket_dir, uuid + '.tar.gz') k.set_contents_from_filename(uuid_tar)
def delete_chop(filename): """ Controller to delete a chop post """ # Query post chop = Post.objects.get(filename = filename) user = User.objects.get(username=session['username']) # User session is the submitter or a mod, then delete the post # otherwise, redirect with a flash message if session['username'] == chop.submitter or user.rank == 'mod': # Delete MongoDB post chop.delete() # Connect to S3 and delete all key objects conn = boto.connect_s3(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY) b = conn.get_bucket(config.BUCKET_NAME) k = Key(b) k.key = 'full/' + filename b.delete_key(k) k.key = 'medium/' + filename b.delete_key(k) k.key = 'thumbs/' + filename b.delete_key(k) else: flash('You are not the submitter') return redirect('/c/%s' % filename) flash('Screenchop deleted') return redirect(url_for('home'))
def generate_uniqueish_key(s3_settings, environment, name_prefix): bucket = get_s3_bucket(s3_settings) if name_prefix and name_prefix != '': name_base = name_prefix else: name_base = environment['db_name'] name_attempt = "{}__{}.dmp.zip".format(name_base, datetime.utcnow().strftime("%Y_%m_%d")) key = bucket.get_key(name_attempt) if not key: key = Key(bucket) key.key = name_attempt return key else: counter = 1 while True: counter += 1 name_attempt = "{}__{}_{}.dmp.zip".format(name_base, datetime.utcnow().strftime("%Y_%m_%d"), counter) if bucket.get_key(name_attempt): continue else: key = Key(bucket) key.key = name_attempt return key
def import_uploads_s3(bucket_name, import_dir, avatar_bucket=False): # type: (str, Path, bool) -> None conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) for record in records: key = Key(bucket) if avatar_bucket: # For avatars, we need to rehash the user's email with the # new server's avatar salt avatar_hash = user_avatar_hash(record['user_profile_email']) key.key = avatar_hash if record['s3_path'].endswith('.original'): key.key += '.original' else: key.key = record['s3_path'] user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in id_maps["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,)) user_profile_id = id_maps["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) key.set_metadata("realm_id", str(user_profile.realm.id)) key.set_metadata("orig_last_modified", record['last_modified']) headers = {'Content-Type': key['content_type']} key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers)
def upload_files(method): global BUCKET files = [] for file_name in os.listdir(FINISHED_PATH): if file_name.endswith('.PNG') or file_name.endswith('.png'): files.append(file_name) conn = boto.connect_s3(opts.accesskey,opts.secret) bucket = conn.create_bucket(BUCKET) i = 1 for file_name in files: out(str(i)+'/'+str(len(files))+' | Uploading: '+file_name) k = Key(bucket) if method == 'overwrite': k.key = file_name elif method == 'prefix': k.key = 'opt_'+file_name elif method == 'newdir': k.key = 'S3crush/'+file_name k.set_contents_from_string(open(FINISHED_PATH+file_name,'r').read()) out(str(i)+'/'+str(len(files))+' | -> Upload finished: '+file_name) i += 1
def test_key_size_with_validate_keyword(): """ Test key.size on boto behavior with validate keyword Not validating keys will make key.size = None Writing to unvalidated keys should update that objects size """ key_name = 'the-key' conn = boto.connect_s3('the_key', 'the_secret') bucket = conn.create_bucket("foobar") if bucket.get_key(key_name) is not None: bucket.delete_key(key_name) for string in ['', '0', '0'*5, '0'*10]: # test non-existent keys bucket.get_key(key_name, validate=False).size.should.be.none (lambda: bucket.get_key(key_name, validate=True).size).should.throw(AttributeError) key = Key(bucket) key.key = key_name key.size.should.be.none # when writing key, key object updates size key.set_contents_from_string(string) key.size.should.equal(len(string)) # validated keys will have size bucket.get_key(key_name, validate=True).size.should.equal(len(string)) # unvalidated keys that do not write do not have size set key2 = Key(bucket) key2.key = key_name key2.size.should.be.none bucket.get_key(key_name, validate=False).size.should.be.none bucket.delete_key(key_name)
def upload(filename='cacerts.pem'): # This method requires the environment variables to be set appropriately: # AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_BUCKET_NAME. BUCKET_NAME = os.environ['AWS_BUCKET_NAME'] s3 = S3Connection() bucket = s3.get_bucket(BUCKET_NAME) # Deploy the CA Bundle to production. k = Key(bucket) with open('cacerts.pem', 'rb') as f: k.key = hashlib.sha1(f.read()).hexdigest() + '.pem' k.set_contents_from_filename(filename) # TODO: setup object redirect. k = Key(bucket) k.key = 'latest.pem' k.set_contents_from_filename(filename)
def deploy_front(): conn = S3Connection() if "page" not in sys.argv: deploy_static("www.sentimentron.co.uk") bucket = conn.get_bucket('www.sentimentron.co.uk') front_page = Key(bucket) front_page.key = "index.html" front_page.set_contents_from_filename("index.html") info_page = Key(bucket) info_page.key = "info.html" info_page.set_contents_from_filename("info.html") example_page = Key(bucket) example_page.key = "examples.html" example_page.set_contents_from_filename("examples.html") paths = Key(bucket) paths.key = "paths.js" paths.set_contents_from_filename("paths.production.js") spinner = Key(bucket) paths.key = "spinner.gif" paths.set_contents_from_filename("spinner.gif")
def upload_config(self, config_file): """ Upload to s3 with the following formats: s3://screeneasy/config/latest/config.json s3://screeneasy/config/date/config.json e.g: s3://screeneasy/config/latest/config.json s3://screeneasy/config/2013/09/11/22-55-08/config.json Args: config_file string - config file path """ import datetime now = datetime.datetime.now() bucket = self.s3_bucket k = Key(bucket) # Overrides latest build k.key = 'config/latest/config.json' k.set_contents_from_filename(config_file) print "uploaded to s3://screeneasy/config/latest/config.json" # Upload a copy for archiving purpose key_name = 'config/%s/config.json' % now.strftime("%Y/%m/%d/%H-%M-%S") k.key = key_name k.set_contents_from_filename(config_file) print "uploaded to s3://screeneasy/%s" % key_name
def download_jobs(geocoder): """ Download and submit jobs from S3. """ logging.info('Downloading jobs') awaiting_folder = 'geocode_awaiting_submission' pending_folder = 'geocode_pending_jobs' connection = boto.connect_s3() bucket = connection.get_bucket(GEO_BUCKET) files = bucket.list('%s' % awaiting_folder) for f in files: try: name = f.name.replace('%s/' % awaiting_folder, '') fkey = bucket.get_key(f.name) email_address = fkey.get_metadata('email') if name: logging.info('Uploading %s to Bing' % name) job_id = geocoder.upload_address_batch(fkey.get_contents_as_string()) if job_id: logging.info('Moving batch with old id %s to new id %s in %s' % ( name, job_id, pending_folder)) new_key = Key(bucket) new_key.key = '%s/%s' % (pending_folder, job_id) if email_address: logging.info('Setting metadata to %s' % email_address) new_key.set_metadata('email', email_address) send_email_notification(email_address, {}, name, 'pending') new_key.set_contents_from_string(name) old_key = Key(bucket) old_key.key = '%s/%s' % (awaiting_folder, name) old_key.delete() else: send_email_notification(email_address, {}, name, 'error') except Exception, e: logging.warning('Error uploading %s to Bing: %s' % (name, e))
def publicUrlTest(): result = 0 userObj = dssSanityLib.getConnection() bucketpref = dssSanityLib.getsNewBucketName() b1 = userObj.create_bucket(bucketpref) k = Key(b1) k.key = 'userObj1' k.set_contents_from_string('Data of URL object') m = Key(b1) m.key = 'userObj1' urlname = m.generate_url(1000) print "\nThe userObj URL is: " + str(urlname) urlname = b1.generate_url(1000) print "\nThe bucket URL is: " + str(urlname) for i in range(1, 3): time.sleep(1) if i % 5 == 0: print str(2 - i) + " Seconds left before Obj deletion" m.delete() print "Object deleted\n" for i in range(1, 3): time.sleep(1) if i % 5 == 0: print str(2 - i) + " Seconds left before bucket deletion" userObj.delete_bucket(bucketpref) print "Bucket deleted\n" return result
def _getDataFiles(self,file_master=0): """ Retrieves metadata and parsed dataframe files (generated by utilities/hddata_process.py) from S3 """ comm = self._comm working_dir = self._working_dir data_source_bucket = self._datasource_bucket if comm.rank == file_master: if not op.exists(op.join( working_dir,'metadata.txt')): conn = boto.connect_s3() b = conn.get_bucket(data_source_bucket) k = Key(b) k.key = 'metadata.txt' k.get_contents_to_filename(op.join( working_dir,'metadata.txt')) if comm.rank == file_master: if not op.exists(op.join( working_dir, 'trimmed_dataframe.pandas')): conn = boto.connect_s3() b = conn.get_bucket(self._working_bucket) k = Key(b) k.key ='trimmed_dataframe.pandas' k.get_contents_to_filename(op.join( working_dir,'trimmed_dataframe.pandas')) comm.barrier()
def put(self, files): logging.info("S3: putting resources to bucket %s with encodings: %s", self._bucket_name, self._encodings) Key = self._get_key_class() bucket = self._bucket encodings = [None] + list(self._encodings) for f in files: if f['type'] == 'dir': continue elif f['type'] == 'stamp': dist, rpath = _stamp_resource(f['distribution'], f['resource_path'], encodings=self._encodings) target = '/'.join([self._path, dist.project_name, dist.version, rpath]) logging.info("Stamping resource %s:%s in S3: %s", f['distribution_name'], f['resource_path'], target) key = Key(bucket) key.key = target key.set_contents_from_filename( f['filesystem_path'], reduced_redundancy=True, policy='public-read') continue dist = f['distribution'] prefix = '/'.join([self._path, dist.project_name, dist.version]) filename = f['resource_path'].split('/')[-1] mimetype = mimetypes.guess_type(filename)[0] for enc in encodings: headers = {'Cache-Control': 'max-age=32140800'} if mimetype: headers['Content-Type'] = mimetype if enc is None: target = '/'.join([prefix, f['resource_path']]) fs_path = f['filesystem_path'] elif enc == 'gzip': target = '/'.join([prefix, enc, f['resource_path']]) if self._should_gzip(mimetype): headers['Content-Encoding'] = 'gzip' source = f['filesystem_path'] c_file, fs_path = self._get_temp_file() try: file = gzip.GzipFile(filename, 'wb', 9, c_file) try: source = open(source, 'rb') try: file.write(source.read()) finally: source.close() finally: file.close() finally: c_file.close() else: fs_path = f['filesystem_path'] else: raise NotImplementedError() logging.info("putting to S3: %s with headers: %s", target, headers) key = Key(bucket) key.key = target key.set_contents_from_filename( fs_path, reduced_redundancy=True, headers=headers, policy='public-read')
def zipdir(path, ziph, tables): for table in tables: ziph.write( os.path.join('extracted_csvs', str(tables.index(table)) + 'tables.csv')) ziph.write(os.path.join('problem1_log.log')) zipf = zipfile.ZipFile('Problem1.zip', 'w', zipfile.ZIP_DEFLATED) zipdir('/', zipf, tables) zipf.close() logging.info("csv and log files successfully zipped!") """Upload the zip file to AWS S3""" try: zipfile = 'Problem1.zip' bucket_name = AWS_ACCESS_KEY_ID.lower() + time.strftime( "%y%m%d%H%M%S") + '-dump' # bucket_name = AWS_ACCESS_KEY_ID.lower() + '-dump' conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) bucket = conn.create_bucket(bucket_name, location=Location.DEFAULT) print('bucket created') print "Uploading %s to Amazon S3 bucket %s" % (zipfile, bucket_name) k = Key(bucket) k.key = 'Problem1' k.set_contents_from_filename(zipfile) print("Zip File successfully uploaded to S3") except: logging.info("Amazon keys or Bucket names are invalid!") exit()
def handle(self, *args, **kwargs): filename = '/tmp/database_export.csv' with open(filename, 'w') as csvfile: writer = csv.writer(csvfile, delimiter='\t', quoting=csv.QUOTE_MINIMAL) headers = [ 'Submission ID', 'Application ID', 'Submission date', 'Email', 'Phone number', 'Prefers Email', 'Prefers SMS', 'Org', 'How did you hear', 'Anything else we should know', 'Has Status Updates', 'First Status Update At', 'First Status', 'Last Status Update At', 'Last Status', 'Is Eligible', 'Is Granted' ] writer.writerow(headers) subs = FormSubmission.objects.order_by('id').all() for sub in subs: for app in sub.applications.all(): granted = app.status_updates.filter( status_type__slug='granted').count() > 0 eligible = app.status_updates.filter( status_type__slug='eligible').count() > 0 status_updates = app.status_updates.order_by('-updated') last_status = status_updates.first() first_status = status_updates.last() has_status_updates = status_updates.count() > 0 if has_status_updates: last_status_name = last_status.status_type.display_name last_status_date = last_status.updated.strftime( "%Y-%m-%d") first_status_name = \ first_status.status_type.display_name first_status_date = first_status.updated.strftime( "%Y-%m-%d") else: first_status_name = None first_status_date = None last_status_name = None last_status_date = None columns = [ sub.id, app.id, sub.date_received.strftime("%Y-%m-%d"), sub.email, sub.phone_number, 'prefers_email' in sub.contact_preferences, 'prefers_sms' in sub.contact_preferences, app.organization.name, sub.how_did_you_hear, sub.additional_information, has_status_updates, first_status_date, first_status_name, last_status_date, last_status_name, eligible, granted ] writer.writerow(columns) conn = S3Connection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY) media_bucket = conn.get_bucket(settings.AWS_MEDIA_BUCKET) key = Key(media_bucket) key.key = 'database_export.csv' key.set_contents_from_filename(filename)
def convert_video_to_mp4(instance_id): # -ac 2 -b:v 2000k -c:a aac -c:v libx264 -b:a 160k -vprofile high -bf 0 -strict experimental -f mp4 # print(output_name + ".mp4".replace("media/", "")) #instance.input_video = instance.video from videos.models import VideoModel instance = VideoModel.objects.get(pk=instance_id) ogthumbnail = instance.thumbnail print(ogthumbnail) if ogthumbnail != "vthumbnail.jpg": instance.thumbnail = "vthumbnail.jpg" instance.save() video = instance.video.url.replace("/", "", 1) #video = 'temp/April.mkv' #video = os.path.abspath(instance.video.url) path = instance.video.url # newvideo = convert_video_to_mp4(video, "media/mp4video/" + instance.title filename, file_extension = os.path.splitext(video) #instance.video.url) norm_file_extension = file_extension file_extension = file_extension.lower() filename = 'temp/' + path_leaf(filename).replace('/app/', '') # uncomment # video = '//s3.us-east-2.amazonaws.com/visumic-bucket/media/mp4video/Nas_-_Cherry_Wine_Explicit_ft._Amy_Winehouse.mp4' # video = 'mp4video/' + path_leaf(filename) + file_extension if file_extension == ".mp4": filename = filename #.replace("/", "", 1) + "_V" else: filename = filename #.replace("/", "", 1) # subprocess.call("ffmpeg -i {input} {output}.mp4".format(input=video, output=filename)) #-f mp4 -movflags frag_keyframe+empty_moov if file_extension != ".mp4": subprocess.call("ffmpeg -re -i {input} -f mp4 {output}.mp4".format( input=video, output=filename), shell=True) newvideo = filename + ".mp4" newvideoname = newvideo.replace("temp/", "") videofile = os.path.abspath(newvideo).replace('/app/', '') videoKey = Key(bucket) videoKey.key = 'media/mp4video/' + newvideoname videoKey.set_contents_from_filename(videofile, cb=percent_cb, num_cb=10) instance.video.delete(save=False) instance.video = 'mp4video/' + newvideoname os.remove(newvideo) # instance.video.delete(save=False) # instance.video = os.path.relpath(newvideo, 'media') instance.save() if ogthumbnail == "vthumbnail.jpg": title = instance.title title = path_leaf(filename).replace('/app/', '') # title = title.replace("(", "_") # title = title.replace(")", "_") title = 'temp/' + title + "" + str(randint(0, 100000)) # print(title) # print(video) subprocess.call( "ffmpeg -i {video} -ss 00:00:20 -t 00:00:1 -s 1080x720 -r 1 -f singlejpeg {thumbnail}.jpg" .format(video=instance.video.url.replace("/", "", 1), thumbnail=title), shell=True) thumbnail = title + ".jpg" thumbnailname = thumbnail.replace("temp/", "") thumbnailfile = os.path.abspath(thumbnail).replace('/app/', '') thumbnailKey = Key(bucket) thumbnailKey.key = 'media/thumbnails/' + thumbnailname thumbnailKey.set_contents_from_filename(thumbnailfile, cb=percent_cb, num_cb=10) #shutil.move(thumbnail, 'media/thumbnails/') # os.rename(title, '/media/thumbnails') instance.thumbnail = 'thumbnails/' + thumbnailname os.remove(thumbnail) #instance.thumbnail = os.path.normpath('thumbnails/' + thumbnail) else: instance.thumbnail = ogthumbnail instance.save() #uncomm print("This will print to the screen first") # instance.input_video.delete(False) # video = newvideoname.replace(".mp4", norm_file_extension) # video = "media/mp4video/" + video # uncomm #uncomm # os.remove(video) return instance
AWS_ACCESS_KEY_ID = '' AWS_SECRET_ACCESS_KEY = '' bucket_name = AWS_ACCESS_KEY_ID.lower() + '-big-data-project' conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) bucket = conn.create_bucket(bucket_name, location=boto.s3.connection.Location.DEFAULT) testfile = "test.csv" print ('Uploading %s to Amazon S3 bucket %s' % \ (testfile, bucket_name)) k = Key(bucket) k.key = 'test.csv' k.set_contents_from_filename(testfile, cb=percent_cb, num_cb=10) predictfile = "predict.csv" print ('Uploading %s to Amazon S3 bucket %s' % \ (predictfile, bucket_name)) k = Key(bucket) k.key = 'predict.csv' k.set_contents_from_filename(testfile, cb=percent_cb, num_cb=10) trainfile = "train.csv" print ('Uploading %s to Amazon S3 bucket %s' % \
def main(): # define global parameters model_type = [ 'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF', # 'ConvolutionalSF' ] convolution = 'y' filename = "unlabeled_10000.mat" # train # unlabeled # STL_10_lcn_unlabeled.mat.h5 channels = 3 patch_size = 14 n_filters = [ 100, 400, 1600, 6400 # 800, # 1600 ] # # [100, 400, 1600, 6400, 25600] # 1600 # increasing neurons x4 maintains dimensionality dimensions = ( [n_filters[0], channels, 11, 11], [n_filters[1], n_filters[0], 4, 4], [n_filters[2], n_filters[1], 3, 3], [n_filters[3], n_filters[2], 2, 2], # [n_filters[4], n_filters[3], 3, 3] ) # ([n_filters, patch_size * patch_size * channels],) # ([100, 256],) pool = None group = None step = None learn_rate = 0.001 # 0.0001 iterations = [ 3, 3, 2, 2 # 1, # 1 ] # [5, 5, 5] # [50] # [100] verbosity = 0 opt = 'GD' whitening = 'y' test_model = 'y' examples = None batch_size = 100 # 360 # 8000 lcn_kernel = [ 5, 5, 3, 3 ] # these may have to be odd values so that there is a middle aws = 'y' # # # load in data # print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", filename) data = None if filename == 'train.mat' or filename == 'unlabeled_10000.mat': data = loadmat(file_path)['X'] elif filename == 'unlabeled.mat' or filename == 'STL_10_lcn_unlabeled.mat.h5': data = h5py.File(file_path, 'r')['X'] data = np.array(data) data = data.T # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read) print "pre-processing data..." data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:examples, :, :, :] print data.shape if filename == 'unlabeled.mat' or filename == 'unlabeled_10000.mat' or filename == 'train.mat': for channel in range(channels): data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :]. reshape((data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), ( data.shape[0], data.shape[2], data.shape[3]) ) # # # determine number of batches # n_batches, rem = divmod(data.shape[0], batch_size) # # # construct the network # print "building model..." # model = sf.Network( # model_type=model_type, # weight_dims=dimensions, # p=pool, # group_size=group, # step=step, # lr=learn_rate, # opt=opt, # c=convolution, # test=test_model, # batch_size=batch_size, # random='y', # weights=None, # lcn_kernel=lcn_kernel # ) # # # compile the training, output, and test functions for the network # print "compiling theano functions..." # train, outputs, test = model.training_functions(data) # # # train the sparse filtering network # print "training network..." # start_time = time.time() # cost = {} # weights = {} # for l in xrange(model.n_layers): # # cost_layer = [] # w = None # # # iterate over training epochs # for epoch in xrange(iterations[l]): # # # go though [mini]batches # for batch_index in xrange(n_batches): # # # create index for random [mini]batch # index = np.int32(np.random.randint(data.shape[0], size=batch_size)) # # c, w = train[l](index=index) # cost_layer.append(c) # print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # # # add layer cost and weights to the dictionaries # cost['layer' + str(l)] = cost_layer # weights['layer' + str(l)] = w # # # calculate and display elapsed training time # elapsed = time.time() - start_time # print('Elapsed training time: %f' % elapsed) # # create sub-folder for saved model directory_name = None if aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) # directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" # directory_name = directory_format % time.localtime()[0:6] directory_name = "./saved/2016-01-25_19h17m41s" # os.mkdir(directory_name) # # # save the model for later use # full_path = directory_name + '/model.pkl' # pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) # if aws == 'y': # k.key = full_path # k.set_contents_from_filename(full_path) # os.remove(full_path) # # # save weights separately # savemat(directory_name + '/weights.mat', weights) # if aws == 'y': # k.key = directory_name + '/weights.mat' # k.set_contents_from_filename(directory_name + '/weights.mat') # os.remove(directory_name + '/weights.mat') # # # save the cost functions # savemat(directory_name + '/cost.mat', cost) # if aws == 'y': # k.key = directory_name + '/cost.mat' # k.set_contents_from_filename(directory_name + '/cost.mat') # os.remove(directory_name + '/cost.mat') # # # create log file # log_file = open(directory_name + "/log.txt", "wb") # todo: create log file by looping through args # for m in range(len(model_type)): # log_file.write( # "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, # model_type[m], # dimensions[m], # iterations[m]) # ) # if model == 'GroupSF' or model == 'GroupConvolutionalSF': # log_file.write( # " Groups: %d \n Step: %d" % (group, step) # ) # ex = data.shape[0] # if examples is not None: # ex = examples # # log_file.write( # " Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening) # ) # log_file.write('\nElapsed training time: %f' % elapsed) # log_file.close() # if aws == 'y': # k.key = directory_name + "/log.txt" # k.set_contents_from_filename(directory_name + "/log.txt") # os.remove(directory_name + "/log.txt") ''' ================================ Test the Model ======================================= ''' # todo: train a model and save it; then load in the model and test it so that grid search can be performed # load in the model if aws == 'y': k.key = directory_name + '/model.pkl' # model = k.read(k.key) model = pickle.loads(k.get_contents_as_string()) # open(model, 'rb') # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # test the model if evaluating classification performance if test_model == 'y': print 'testing...' from sklearn import svm # set some new local parameters train_data_file = "STL_10_lcn_train.mat" # "train.mat" train_labels_file = "train.mat" test_data_file = "STL_10_lcn_test.mat" # "test.mat" test_labels_file = "test.mat" batch_size = 100 # todo: read in lcn data # load in STL-10 training data (all pre-normalized using LCN) print "loading in training and test data..." file_path = os.path.join(base_path, "data", train_data_file) train_data = loadmat(file_path)['X'] file_path = os.path.join(base_path, "data", train_labels_file) train_labels = loadmat(file_path)['y'] # load in STL-10 test data (all pre-normalized using LCN) file_path = os.path.join(base_path, "data", test_data_file) test_data = loadmat(file_path)['X'] file_path = os.path.join(base_path, "data", test_labels_file) test_labels = loadmat(file_path)['y'] # # preproces training and test data # print "preprocessing training and test data..." # print train_data.shape # train_data = np.float32(train_data.reshape(-1, # 3, # int(np.sqrt(train_data.shape[1] / 3)), # int(np.sqrt(train_data.shape[1] / 3))) # ) # print train_data.shape # for channel in range(channels): # train_data[:, channel, :, :] = np.reshape(scaling.LCNinput(train_data[:, channel, :, :]. # reshape((train_data.shape[0], 1, # train_data.shape[2], # train_data.shape[3])), # kernel_shape=9), ( # train_data.shape[0], # train_data.shape[2], # train_data.shape[3])) # # test_data = np.float32(test_data.reshape(-1, # 3, # int(np.sqrt(test_data.shape[1] / 3)), # int(np.sqrt(test_data.shape[1] / 3))) # ) # for channel in range(channels): # test_data[:, channel, :, :] = np.reshape(scaling.LCNinput(test_data[:, channel, :, :]. # reshape((test_data.shape[0], 1, # test_data.shape[2], # test_data.shape[3])), # kernel_shape=9), ( # test_data.shape[0], # test_data.shape[2], # test_data.shape[3])) # read in the pre-defined fold indices file_path = os.path.join(base_path, "data", "train.mat") fold_indices = loadmat(file_path)['fold_indices'] fold_indices -= np.ones(fold_indices.shape) # make zero-index # train and test a SVM classifier for each layer (including pixels as baseline) accuracy = {} train_input = None test_input = None cm = None c_parameters = [0.02, 0.005, 0.002, 0.001] for layer in range(1, model.n_layers + 1): # range(test_model.n_layers + 1): # skipping pixels for now # create dictionary for layer and list for calculations accuracy['layer' + str(layer)] = {} accuracy_list = [] # create quadrant pooling function based on size of output from layer quadrant_size = test[layer - 1](test_data[0, :, :, :].reshape((1, 3, 96, 96)))[0].shape[3] / 2 print quadrant_size quad_pool = quadrant_pooling(quadrant_size) # loop over pre-defined folds n_folds = fold_indices.shape[1] for fold in xrange(n_folds): # get fold data fold_index = fold_indices[0][fold].astype('int') train_data_fold = np.squeeze(train_data[fold_index]) train_labels_fold = np.squeeze(train_labels[fold_index]) # pixel inputs if layer == 0: if fold == 0: # only get test data once test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]) train_input = train_data_fold.reshape(train_data_fold.shape[0], train_data_fold.shape[1] * train_data_fold.shape[2] * train_data_fold.shape[3]) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? if fold == 0: # only get test data once print "getting test data..." test_input = np.zeros((test_data.shape[0], n_filters[layer - 1], 2, 2)) n_batches = test_data.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](test_data[batch_start:batch_end]) temp = temp[0] test_input[batch_start:batch_end] = quad_pool(temp)[0] test_input = test_input.reshape(test_input.shape[0], test_input.shape[1] * test_input.shape[2] * test_input.shape[3]) print "getting training data..." train_input = np.zeros((train_data_fold.shape[0], n_filters[layer - 1], 2, 2)) n_batches = train_data_fold.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](train_data_fold[batch_start:batch_end]) temp = temp[0] train_input[batch_start:batch_end] = quad_pool(temp)[0] train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # normalize the inputs for each dimension (zero-mean and unit-variance) if fold == 0: # only normalize test data once test_input -= test_input.mean(axis=1)[:, np.newaxis] test_input /= np.std(test_input, axis=1)[:, np.newaxis] train_input -= train_input.mean(axis=1)[:, np.newaxis] train_input /= np.std(train_input, axis=1)[:, np.newaxis] # train linear support vector machine print("Training linear SVM...") clf = svm.SVC(C=c_parameters[layer - 1], kernel="linear").fit(train_input, np.ravel(train_labels_fold[0:examples])) # get predictions from SVM and calculate accuracy print("Making predictions...") accuracy['layer' + str(layer)]['fold' + str(fold)] = clf.score(test_input, test_labels[0:examples]) accuracy_list.append(accuracy['layer' + str(layer)]['fold' + str(fold)]) training_accuracy = clf.score(train_input, np.ravel(train_labels_fold[0:examples])) # display results and log them print("Accuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)])) print "classification performance on training set: %0.4f" % training_accuracy log_file = open(directory_name + "/log_test.txt", "a") log_file.write( "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)]) ) log_file.close() # calculate and print out average accuracy and std avg = np.mean(accuracy_list) std = np.std(accuracy_list) print "The overall accuracy of layer %d: %0.4f +/- (%0.4f)" % (layer, float(avg), float(std)) log_file = open(directory_name + "/log_test.txt", "a") log_file.write( "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)]) ) log_file.close() # save for aws if aws == 'y': k.key = directory_name + "/log_test.txt" k.set_contents_from_filename(directory_name + "/log_test.txt") # save the test results savemat('accuracy', accuracy)
bucket.configure_lifecycle(lifecycle_cfg) print("Uploading site files...") # Only upload the files we need files = [ ['./', 'index.html'], ['./css/', 'bootstrap.min.css'], ['./css/', 'bootstrap-responsive.min.css'], ['./css/', 'main.css'], ['./js/', 'main.js'], ['./js/', 'moment.min.js'], ['./js/', 'settings.js'], ['./js/vendor/', 'bootstrap.min.js'], ['./js/vendor/', 'jquery-1.8.2.min.js'], ['./js/vendor/', 'modernizr-2.6.1.min.js'], ['./flash/', 'clippy.swf'], ] matches = [] for root, filename in files: file = os.path.join(root, filename) k = Key(bucket) k.key = file.replace("./", "") k.set_contents_from_filename(file, policy=CannedACLStrings[1]) print(k.key) website_endpoint = bucket.get_website_endpoint() print("\nDone! Website deployed to:\n\n\033[1;32mhttp://%s/\033[0m\n" % (website_endpoint))
} match_data = {} match_data['date'] = date match_data['players'] = players match_data['tournament'] = tournament match_data['roundNum'] = roundNum if datetime.datetime.now() >= datetime.datetime(date['year'],date['month'],date['day'],match_cutoff_time['hour'],match_cutoff_time['minute']): match_data['cutoffTime'] = 0 else: match_data['cutoffTime'] = match_cutoff_time match_data = json.dumps(match_data) print match_data k = Key(b) k.key = 'matchData.json' k.set_contents_from_string(match_data) k.make_public() k2 = Key(b) k2.key = 'archive/2015/' + tournament + '/' + str(roundNum) + '/matchData.json' k2.set_contents_from_string(match_data) k2.make_public()
def upload_S3(bucket, dir, file): k = Key(bucket) k.key = file k.set_contents_from_filename(dir + file, cb=percent_cb, num_cb=10)
import boto import boto.s3 import sys import json from boto.s3.key import Key with open('config.json', 'r') as f: config = json.load(f) bucket_name = 'space-view-test' conn = boto.connect_s3(config['AWS_ACCESS_KEY_ID'], config['AWS_SECRET_ACCESS_KEY']) bucket = conn.create_bucket(bucket_name, location=boto.s3.connection.Location.DEFAULT) testfile = "test.png" print('Uploading %s to Amazon S3 bucket %s' % (testfile, bucket_name)) def percent_cb(complete, total): sys.stdout.write('.') sys.stdout.flush() k = Key(bucket) k.key = 'test' k.set_contents_from_filename(testfile, cb=percent_cb, num_cb=10)
host='127.0.0.1', port=7480, is_secure=False, calling_format=boto.s3.connection.OrdinaryCallingFormat()) #################################################### ################## TEST CASE ####################### print "\nCreating and populating the bucket for user1..." b1 = conn_u1.create_bucket('bucket_a') k = Key(b1) for i in range(1, 11): print "\tCreating obj %d" % (i) keyv = 'keynum' + str(i) valv = 'Contents of object' k.key = keyv k.set_contents_from_string(valv) print "\nSetting ACL..." ##b1.set_acl('public-read') b1.set_acl('private') b2 = conn_u2.get_bucket(b1.name) print "\nU2: Name of this bucket is {b2name}".format(b2name=b2.name) print "U2: Attempting to read objects from a private bucket:" m = Key(b2) for i in range(1, 11): keyv = 'keynum' + str(i) m.key = keyv print "Object " + str(i) + ": " + m.get_contents_as_string() ####################################################
import boto import uuid # instantiate new client for S3, uses creds in ~/.aws/credentials (via environment variables) s3_client = boto.connect_s3() # uploads to S3 must be to a bucket, which must have a unique name bucket_name = "keep-tag_report-%s" % uuid.uuid4() print "The reports will be in bucket: " + bucket_name my_shiny_new_bucket = s3_client.create_bucket(bucket_name) print "Created bucket..." # Object (a.k.a. files): Refer to it by its key (name) from boto.s3.key import Key key_aka_nameOfObject = Key(my_shiny_new_bucket) key_aka_nameOfObject.key = 'volume.tsv' print "Uploading data to " + bucket_name + " with key: " + key_aka_nameOfObject.key # Put a bit of data into the object a.k.a. file key_aka_nameOfObject.set_contents_from_filename("test.tsv") # use generate_url to make an URL seconds_to_expire = 240 # you have 4 minutes print "Making a public URL for the uploaded object. Lives for %d seconds." % seconds_to_expire print print key_aka_nameOfObject.generate_url(seconds_to_expire) print raw_input("Press enter to delete object and bucket...")
def get_entity_picture(self, entity_id): f = self.db_get( """ SELECT entity.entity_definition_keyname AS definition, file.id AS file_id, file.md5, file.s3_key FROM entity LEFT JOIN ( SELECT p.entity_id, f.id, f.md5, f.s3_key FROM property AS p, property_definition AS pd, file AS f WHERE pd.keyname = p.property_definition_keyname AND f.id = p.value_file AND p.is_deleted = 0 AND p.value_file > 0 AND p.entity_id = %s AND pd.is_deleted = 0 AND pd.dataproperty = 'photo' ORDER BY f.filename LIMIT 1 ) AS file ON file.entity_id = entity.id WHERE entity.id = %s AND entity.is_deleted = 0 LIMIT 1; """, entity_id, entity_id) if not f: return if not f.get('md5') and not f.get('s3_key'): return thumbname = os.path.join(self.settings['files-path'], 'thumbs', self.app_settings('database-name'), '%s' % f.get('file_id')) if os.path.isfile(thumbname): with open(thumbname, 'r') as myfile: filecontent = myfile.read() elif f.get('s3_key'): try: AWS_BUCKET = self.app_settings('auth-s3', '\n', True).split('\n')[0] AWS_ACCESS_KEY = self.app_settings('auth-s3', '\n', True).split('\n')[1] AWS_SECRET_KEY = self.app_settings('auth-s3', '\n', True).split('\n')[2] except Exception, e: return self.json( { 'error': 'Amazon S3 bucket, key or secret not set!', 'time': round(self.request.request_time(), 3), }, 400) s3_conn = S3Connection(AWS_ACCESS_KEY, AWS_SECRET_KEY) s3_bucket = s3_conn.get_bucket(AWS_BUCKET, validate=False) s3_key = Key(s3_bucket) s3_key.key = f.get('s3_key') try: filecontent = self.save_thumb( Image.open(StringIO(s3_key.get_contents_as_string())), thumbname) except Exception, e: return self.json( { 'error': e, 'time': round(self.request.request_time(), 3), }, 404)
dfMaster[1] = dfMaster[1].astype('int') dfMaster[2] = dfMaster[2].astype('int') dfMaster[3] = dfMaster[3].astype('int') dfMaster[4] = dfMaster[4].astype('int') dfMaster[5] = dfMaster[5].astype('int') dfMaster[6] = dfMaster[6].astype('int') dfMaster[7] = dfMaster[7].astype('int') dfMaster[8] = dfMaster[8].astype('int') dfMaster[9] = dfMaster[9].astype('int') findata = dfMaster.values return findata for el in pic: k.key = el k.get_contents_to_filename(el) findata = pickle.load(open(el, 'rb')) findata = typeconvert(findata) ################## Graphs,....
def write_to_s3_priv(filename): k = Key(bucket_private) k.key = filename k.set_contents_from_filename(filename, cb=percent_cb, num_cb=10)
def check_user(): k=Key(bucket1) k.key='access.txt' return k.get_contents_as_string()
def fetch_link(link): # Fetch our mirrored copy of the given link if available; # if not, mirror and return the original file url = link.download_url # find last mirrored download download = Download.objects.filter( link_class=link.link_class, parameter=link.parameter ).exclude(mirror_s3_key='').order_by('-downloaded_at').first() if download: # existing download was found; fetch it return download.fetch_from_s3() else: # no mirrored copy exists - fetch and mirror the origin file try: blob = fetch_origin_url(url) except (urllib2.URLError, FileTooBig) as ex: Download.objects.create( downloaded_at=datetime.datetime.now(), link_class=link.link_class, parameter=link.parameter, error_type=ex.__class__.__name__ ) raise download = Download( downloaded_at=datetime.datetime.now(), link_class=link.link_class, parameter=link.parameter, sha1=blob.sha1, md5=blob.md5, file_size=blob.file_size, ) # is there already a mirrored link with this sha1? existing_download = Download.objects.filter(sha1=blob.sha1).first() if existing_download: download.mirror_s3_key = existing_download.mirror_s3_key else: key_name = blob.sha1[0:2] + '/' + blob.sha1[2:4] + '/' + blob.sha1[4:16] + '/' + clean_filename(blob.filename) bucket = open_bucket() k = Key(bucket) k.key = key_name k.set_contents_from_string(blob.file_content) download.mirror_s3_key = key_name download.save() if link.is_zip_file(): # catalogue the zipfile contents if we don't have them already if not ArchiveMember.objects.filter(archive_sha1=blob.sha1).exists(): z = blob.as_zipfile() for info in z.infolist(): # zip files do not contain information about the character encoding of filenames. # We therefore decode the filename as iso-8859-1 (an encoding which defines a character # for every byte value) to ensure that it is *some* valid sequence of unicode characters # that can be inserted into the database. When we need to access this zipfile entry # again, we will re-encode it as iso-8859-1 to get back the original byte sequence. ArchiveMember.objects.get_or_create( filename=info.filename.decode('iso-8859-1'), file_size=info.file_size, archive_sha1=blob.sha1) return blob
if len(result) == 0: print "No messages available... Exiting" sys.exit(1) #read the message body m = result[0] body = m.get_body() decoded_message = json.loads(body) decoded_message = json.loads(decoded_message["Message"]) print decoded_message["Key"] # read the file from trhe src bucket src_key = src_bucket_name.get_key(decoded_message["Key"]) src_key.get_contents_to_filename("images/" + decoded_message["Key"]) #apply the black filter to the image os.system("convert images/" + decoded_message["Key"] + " -monochrome " + decoded_message["Key"]) #upload the image back to the location it was before it was lost from boto.s3.key import Key key = Key(target_bucket_name) key.key = decoded_message["Key"] key.set_contents_from_filename(decoded_message["Key"]) print "Your lost file has been automatically recreated and uploaded to its proper location" devops.delete_message(m)
bucket_name = sys.argv[1] # Create a temporary directory to store local files tmpdir = tempfile.mkdtemp() conn = S3Connection() bucket = conn.get_bucket(bucket_name) for key in bucket.list(prefix='incoming/'): filename = key.key.strip('incoming/') print 'Resizing %s' % filename # Copy the file to a local temp file tmpfile = '%s/%s' % (tmpdir, filename) key.get_contents_to_filename(tmpfile) # Resize the image with PIL orig_image = Image.open(tmpfile) # Find the file extension and remove it from filename file_ext = filename.split('.')[-1] for resolution in IMAGE_SIZES: resized_name = '%s%sx%s.%s' % (filename.rstrip(file_ext), resolution[0], resolution[1], file_ext) print 'Creating %s' % resized_name resized_tmpfile = '%s/%s' % (tmpdir, resized_name) resized_image = orig_image.resize(resolution) resized_image.save(resized_tmpfile) # Copy the resized image to the S3 bucket resized_key = Key(bucket) resized_key.key = 'processed/%s' % resized_name resized_key.set_contents_from_filename(resized_tmpfile) # Delete the original file from the bucket key.delete() # Delete the temp dir shutil.rmtree(tmpdir)
import boto from boto.s3.key import Key from gzipstream import GzipStreamFile import warc if __name__ == '__main__': # Let's use a random gzipped web archive (WARC) file from the 2014-15 Common Crawl dataset ## Connect to Amazon S3 using anonymous credentials conn = boto.connect_s3(anon=True) pds = conn.get_bucket('aws-publicdatasets') ## Start a connection to one of the WARC files k = Key(pds) k.key = 'common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00000-ip-10-147-4-33.ec2.internal.warc.gz' # The warc library accepts file like objects, so let's use GzipStreamFile f = warc.WARCFile(fileobj=GzipStreamFile(k)) for num, record in enumerate(f): if record['WARC-Type'] == 'response': # Imagine we're interested in the URL, the length of content, and any Content-Type strings in there print record['WARC-Target-URI'], record['Content-Length'] print '\n'.join(x for x in record.payload.read().replace( '\r', '').split('\n\n')[0].split('\n') if 'content-type:' in x.lower()) print '=-=-' * 10 if num > 100: break
#!/usr/bin/env python import os import boto from boto.s3.key import Key home = os.environ['HOME'] OrdinaryCallingFormat = boto.config.get('s3', 'calling_format', 'boto.s3.connection.OrdinaryCallingFormat') s3 = boto.connect_s3(host='localhost', port=10001, calling_format=OrdinaryCallingFormat, is_secure=False) b = s3.create_bucket('mockimg') k_img = Key(b) k_img.key = 'pics/example.jpg' k_img.set_contents_from_filename('%s/pics/example.jpg' % home)
r = requests.get(link) soup = BeautifulSoup(r.text) player_table = soup.find(class_='field-table-content') players = player_table.find_all("p") for player in players: raw_name = player.text clean_name = raw_name.split(',') clean_name = clean_name[1][1:] + ' ' + clean_name[0] field.append(clean_name) print "field:", len(field) # get mapping from PGA names to SportsData names k1 = Key(b) k1.key = 'playerData/pgaToSportsDataMapping' player_map = k1.get_contents_as_string() player_map = json.loads(player_map) k = Key(b) k.key = 'sportsData/' + str(year) + '/schedule.json' schedule_string = k.get_contents_as_string() schedule = json.loads(schedule_string) for pga_name in field: print pga_name if pga_name in ['Ben Crenshaw', 'Matias Dominguez', 'Scott Harvey']: continue player = player_map['players'][pga_name]
def upload_photo_to_s3(photo_id, **kwargs): """docstring for upload_photo_to_s3""" from gelder.models import Photo #PhotoUrl, AudioArtUrl # modeldict = { 'PhotoUrl': PhotoUrl, 'AudioArtUrl': AudioArtUrl } # photourl_model = kwargs.get('model', 'PhotoUrl') # photourl = modeldict.get(photourl_model,PhotoUrl).objects.get(id = photourl_id) or None # photo = photourl.photo if photourl_model == 'PhotoUrl' else photourl.audioart photo = Photo.objects.get(id=photo_id) or None #raise Exception([photo_id, photo,]) if photo is None: msg = '(error) in upload_photo_to_s3. cannot find %s with id: %s ', ( type(photo).__name__, photo_id) return {'status': True, 'msg': msg} #photo = modelobject.photo msg = '' try: conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) bucketname = settings.GELDER_S3_PHOTO_BUCKET #if photourl_model == 'PhotoUrl' else settings.S3_AUDIOART_BUCKET bucket = conn.create_bucket(bucketname) k = Key(bucket) k.key = photo.basename k.set_metadata('checksum', photo.checksum) #raise Exception([photo, photo.photo, photo.photo.path, photo.basename, ]) #path = photo.photo.path if photourl_model == 'PhotoUrl' else photourl.photo.generate_photo_thumb('event_display') path = photo.photo.path #if photourl_model == 'PhotoUrl' else photourl.photo.generate_photo_thumb('event_display') ## upload the photo k.set_contents_from_filename(photo.photo.path) k.set_acl("public-read") # if photourl_model == 'PhotoUrl': # thumbinfo = { 'event_display': settings.GELDER_S3_PHOTO_BUCKET, # 'small_thumb': settings.GELDER_S3_PHOTO_BUCKET } import os # then upload the display thumb_format = 'event_display' thumburl = photo.generate_photo_thumb(thumb_format) bucket = conn.create_bucket(bucketname) k = Key(bucket) k.key = '%s%s' % (settings.GELDER_PHOTO_DISPLAY_TAG, photo.basename) if os.path.isfile(thumburl): k.set_contents_from_filename(thumburl) k.set_acl("public-read") #if bucket.get_key(k.key): # msg = '(error) s3 failed uploading photo with id: %s', (photo_id) # return {'status':False, 'exc': None, 'msg': msg } # try: # os.remove(thumburl) # except IOError as (errno, strerror): # pass #print "I/O error({0}): {1}".format(errno, strerror) # then upload the thumbnail thumb_format = 'small_thumb' thumburl = photo.generate_photo_thumb(thumb_format) bucket = conn.create_bucket(bucketname) k = Key(bucket) k.key = '%s%s' % (settings.GELDER_PHOTO_THUMB_TAG, photo.basename) if os.path.isfile(thumburl): k.set_contents_from_filename(thumburl) k.set_acl("public-read") #if bucket.get_key(k.key) is None: # msg = '(error) s3 failed uploading photo with id: %s', (photo_id) # return {'status':False, 'exc': None, 'msg': msg } # try: # os.remove(thumburl) # except IOError as (errno, strerror): # pass #print "I/O error({0}): {1}".format(errno, strerror) # for thumb_format, thumb_bucket in thumbinfo.iteritems(): # thumburl = photo.photo.generate_photo_thumb(thumb_format) # bucket = conn.create_bucket(thumb_bucket) # # Delete the old key # oldkey = bucket.get_key(photourl.photo.basename) # if oldkey: # oldkey.delete # # k = Key(bucket) # k.key = photourl.photo.basename # if os.path.isfile(thumburl): # k.set_contents_from_filename(thumburl) # k.set_acl("public-read") # if bucket.get_key(k.key): # os.remove(thumburl) photo.uploaded = True import datetime photo.uploaddate = datetime.datetime.now() photo.save() msg = '(success) uploaded to s3 for photo with key: %s', ( photo.basename, ) #raise Exception([msg, photo.id, photo.photo.path, ]) except (AWSConnectionError, S3ResponseError, S3PermissionsError, S3CreateError), exc: msg = '(error) s3 failed uploading photo with id: %s', (photo_id) return {'status': False, 'exc': exc, 'msg': msg}
def deleteFile(self, in_file): k = Key(self.bucket) k.key = in_file self.bucket.delete_key(k)
out_file.write(articleContent) out_file.close() ################################################################################## # local text files to S3 ################################################################################## AWS_ACCESS_KEY_ID = '<>' AWS_SECRET_ACCESS_KEY = '<>' END_POINT = '<>' # eg. us-east-1 S3_HOST = '<>' # eg. s3.us-east-1.amazonaws.com BUCKET = '<>' BUCKET_DIRECTORY = '<input directory>/' conn = boto.s3.connect_to_region(END_POINT, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, host=S3_HOST) LOCAL_PATH = os.getcwd() +'\\<input directory>\\' text_files_list = [f for f in os.listdir(LOCAL_PATH) if f.endswith('.txt')] for file in text_files_list: bucket_obj = conn.get_bucket(BUCKET) k = Key(bucket_obj) k.key = BUCKET_DIRECTORY + file k.set_contents_from_filename(LOCAL_PATH + file)
def upload(self, key, text): k = Key(self.bucket) k.key = key return k.set_contents_from_string(text)
keyname = "{0}/{1}/{2}_{3}_{4}.ei.spb".format(flavor, info['taskID'].split('.')[0], info['taskID'], info['jobID'], objID) # convert metada values to strings (numbers make generate_url fail) # do not include guids info2 = {} for k, v in info.iteritems(): if k != 'guids': info2[k] = v.__str__() try: kb = Key(bucket) kb.key = keyname kb.metadata = info2 kb.set_contents_from_filename(fname) kb.set_acl('public-read') if opt.http: url = kb.generate_url(expires_in=0, query_auth=False, force_http=True) except Exception, e: log.info("Unable to store object " + str(e)) raise Exception(e) if opt.http: u = urlparse(url) if u.port is not None: urlx = "{}://{}:{}{}".format(u.scheme, u.hostname, u.port, u.path)
year = 2015 # get tournament schedule from AWS c = S3Connection('AKIAIQQ36BOSTXH3YEBA', 'cXNBbLttQnB9NB3wiEzOWLF13Xw8jKujvoFxmv3L') b = c.get_bucket('public.tenthtee') k = Key(b) k1 = Key(b) k2 = Key(b) rs = b.list() keys = [] for key in rs: keys.append(key.name) k.key = 'sportsData/' + str(year) + '/schedule.json' schedule_string = k.get_contents_as_string() schedule = json.loads(schedule_string) # get tournament id for tournament in schedule['tournaments']: if tournament['name'] == tournament_name: # uncomment line below to identify the tournament names # print tournament['name'],tournament['id'] # if tournament['name'] == target_tournament: break # identify tournament to get api #if tournament['name'] == tournament_name: tournament_id = tournament['id']
def save_to_bucket(bucket, date, region='us-west-2'): print('Uploading to S3') key = Key(bucket) key.key = _key_for_date(date) key.set_contents_from_filename('/tmp/scan.jpg') print('Completed upload to S3')
def value(self,name,key): bucket=self.conn.get_bucket(name,validate=False) k=Key(bucket) k.key=key return k.get_contents_as_string()
# Draw a rectangle around the faces if len(faces) > 0: faces = sorted(faces, reverse=True, key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))[0] fx, fy, fw, fh = faces roi = frame[fy:fy + fh, fx:fx + fw] cv2.rectangle(frame, (fx, fy), (fx + fw, fy + fh), (0, 0, 255), 2) cv2.imwrite(file_name,frameClone) k = Key(bucket) k.key = file_name k.set_contents_from_filename(file_name, cb=percent_cb, num_cb=10) time.sleep(10) cv2.imshow('your_face', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything is done, release the capture video_capture.release() cv2.destroyAllWindows()