def handle(self, *args, **options): conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) b = conn.get_bucket(AWS_STORAGE_BUCKET_NAME) for sked in ['e','b', 'a']: filename = "sked%s.csv" % sked local_skedfile = "%s/%s" % (CSV_EXPORT_DIR, filename) print "Dumping sked %s to %s" % (sked, local_skedfile) dump_all_sked(sked, local_skedfile) # need to gzip these gzip_cmd = "gzip -f %s" % (local_skedfile) filename_zipped = filename + ".gz" local_skedfile_zipped = local_skedfile + ".gz" # old style os.system just works - subprocess sucks. proc = os.system(gzip_cmd) s3_path = "%s/%s" % (AWS_BULK_EXPORT_PATH,filename_zipped) print "pushing %s to S3: bucket=%s path=%s" % (local_skedfile_zipped, AWS_STORAGE_BUCKET_NAME,s3_path) start = time.time() k = Key(b) k.key = s3_path k.set_contents_from_filename(local_skedfile_zipped, policy='public-read') elapsed_time = time.time() - start print "elapsed time for pushing to s3 is %s" % (elapsed_time) # if we didn't die, set the update time set_update(BULK_EXPORT_KEY)
def map(self): mc=MongoClient('ec2-52-0-148-244.compute-1.amazonaws.com',27017) dbmc=mc.genid idoc=dbmc.gentable.find_one_and_update(filter={},update={ "$inc": { "score": 1 } },upsert=True); k=Key(self.bucket) y=stopwords.words('english') i=1 strx=str(int(idoc['score'])) strz=None filestring="" for line in sys.stdin: line = unicode(line, "utf-8","ignore") pattern = re.compile(r'\b(' + r'|'.join(y) + r')\b\s*') line = pattern.sub('', line) tokenizer = RegexpTokenizer(r'\w+') words=tokenizer.tokenize(line) strz=strx+'a'+str(i) k.key=strz filestring=line+'\n' k.set_contents_from_string(filestring) for word in words: word=word.encode(encoding='UTF-8',errors='ignore') print '%s\t%s' % (word.strip(), strz) i+=1
def get_file_from_s3(filename): conn = S3Connection(ACCESS_KEY, SECRET) bucket = conn.get_bucket(BUCKET_NAME) k = Key(bucket) k.key = filename k.get_contents_to_filename(filename)
def _upload_attachments_to_s3(self): try: if settings.EMAIL_QUEUE['aws'].get('attachment_bucket') is None: error_logger.error("Attachments require attachment_bucket under settings. Skipping sending attachments") return [] if self._s3_conn is None: self._s3_conn = S3Connection(settings.EMAIL_QUEUE['aws']['key'], settings.EMAIL_QUEUE['aws']['secret']) bucket = self._s3_conn.get_bucket(settings.EMAIL_QUEUE['aws']['attachment_bucket']) uploaded_attachments = [] for attachment in self.attachments: k = Key(bucket) filename = os.path.basename(urlparse.urlsplit(attachment.get('url')).path) k.key = filename k.set_contents_from_filename(attachment.get('url')) if settings.EMAIL_QUEUE['aws'].get('s3-url-endpoint') is None: s3_url_endpoint = "https://s3-" + settings.EMAIL_QUEUE['aws']['region'] + ".amazonaws.com/" + settings.EMAIL_QUEUE['aws']['attachment_bucket'] + '/' else: s3_url_endpoint = settings.EMAIL_QUEUE['aws'].get('s3-url-endpoint') s3_uploaded_url = s3_url_endpoint + filename uploaded_attachment = copy.deepcopy(attachment) uploaded_attachment['url'] = s3_uploaded_url uploaded_attachments.append(uploaded_attachment) return uploaded_attachments except Exception as e: raise e
def s3_delete_avatar(id): s3conn = boto.connect_s3(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY) bucket = s3conn.get_bucket(S3_BUCKET) k = Key(bucket) k.key = 'userid-' + str(id) k.delete()
def pull_from_s3_and_submit_to_pcp(operation, params): statsd.incr("pull_from_s3_and_submit_to_pcp") print "pulling from S3" params = loads(operation.params) video_id = params['video_id'] workflow = params['workflow'] video = Video.objects.get(id=video_id) ouuid = operation.uuid filename = video.filename() suffix = video.extension() conn = boto.connect_s3( settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) bucket = conn.get_bucket(settings.AWS_S3_UPLOAD_BUCKET) k = Key(bucket) k.key = video.s3_key() t = tempfile.NamedTemporaryFile(suffix=suffix) k.get_contents_to_file(t) t.seek(0) operation.log(info="downloaded from S3") # TODO: figure out how to re-use submit_to_pcp() print "submitting to PCP" pcp = PCP(settings.PCP_BASE_URL, settings.PCP_USERNAME, settings.PCP_PASSWORD) filename = str(ouuid) + suffix print "submitted with filename %s" % filename title = "%s-%s" % (str(ouuid), strip_special_characters(video.title)) print "submitted with title %s" % title pcp.upload_file(t, filename, workflow, title, video.description) return ("submitted", "submitted to PCP")
def save_file_to_s3(operation, params): if not waffle.switch_is_active('enable_s3'): print "S3 uploads are disabled" return ("complete", "S3 uploads temporarily disabled") statsd.incr("save_file_to_s3") conn = boto.connect_s3( settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY) bucket = conn.get_bucket(settings.AWS_S3_UPLOAD_BUCKET) k = Key(bucket) # make a YYYY/MM/DD directory to put the file in source_file = open(params['tmpfilename'], "rb") n = datetime.now() key = "%04d/%02d/%02d/%s" % ( n.year, n.month, n.day, os.path.basename(params['tmpfilename'])) k.key = key k.set_contents_from_file(source_file) source_file.close() f = File.objects.create(video=operation.video, url="", cap=key, location_type="s3", filename=params['filename'], label="uploaded source file (S3)") OperationFile.objects.create(operation=operation, file=f) return ("complete", "")
def upload_package(self, application_name, package_path): bucket = self._connection.get_bucket(application_name) key = Key(bucket) key.key = os.path.basename(package_path) key.set_contents_from_filename(package_path) return application_name, key.key
def download_file(bucket, keystr, filename): try: key = Key(bucket=bucket, name=keystr) key.get_contents_to_filename(filename) except Exception, e: print "Attempted to fetch {} from {} as {}".format(keystr, bucket, filename) print "Download failed: {}".format(e)
def download_build(build_dir, bucket_name, project_name): ''' Downloads build.zip from the specified S3 bucket and unpacks it into the specified build directory. @type base_dir: String @param base_dir: Build directory @type bucket_name: String @param bucket_name: Name of the S3 bucket to use @type project_name: String @param project_name: Name of the project folder inside the S3 bucket ''' # Clear any previous builds if os.path.exists(build_dir): shutil.rmtree(build_dir) os.mkdir(build_dir) zip_dest = os.path.join(build_dir, "build.zip") conn = S3Connection() bucket = conn.get_bucket(bucket_name) remote_key = Key(bucket) remote_key.name = "%s/build.zip" % project_name remote_key.get_contents_to_filename(zip_dest) subprocess.check_call(["unzip", zip_dest, "-d", build_dir])
def upload_build(build_file, bucket_name, project_name): ''' Upload the given build zip file to the specified S3 bucket/project directory. @type build_file: String @param build_file: (ZIP) file containing the build that should be uploaded @type bucket_name: String @param bucket_name: Name of the S3 bucket to use @type project_name: String @param project_name: Name of the project folder inside the S3 bucket ''' if not os.path.exists(build_file) or not os.path.isfile(build_file): print("Error: Build must be a (zip) file.", file=sys.stderr) return conn = S3Connection() bucket = conn.get_bucket(bucket_name) remote_file = "%s/build.zip" % project_name remote_key = Key(bucket) remote_key.name = remote_file print("Uploading file %s -> %s" % (build_file, remote_key.name)) remote_key.set_contents_from_filename(build_file)
def _download_manifest(self, bucket, download_dir, manifest_name): k = Key(bucket) k.key = manifest_name manifest_local_file = os.path.join(download_dir, manifest_name) with open(manifest_local_file,'wb') as man_file: k.get_contents_to_file(man_file) return manifest_local_file
def test_key_with_strings(self): """ test simple key 'from_string' and 'as_string' functions """ key_name = "test-key" test_string = os.urandom(1024) # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name # self.assertFalse(write_key.exists()) # upload some data write_key.set_contents_from_string(test_string) self.assertTrue(write_key.exists()) # create another key with the same name read_key = Key(bucket, key_name) # read back the data returned_string = read_key.get_contents_as_string() self.assertEqual(returned_string, test_string, (len(returned_string), len(test_string))) # delete the key read_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def cache_item(payload): # "source": "s3://my-bucket/key" # "target": "/my-path/key.maybe-extension-too # "bucket": "my-bucket" # "key": "key" print "received request to cache " + payload['bucket'] + '/' + payload['key'] + ' to ' + payload['target'] bucket = S3_connection.get_bucket(payload['bucket']) S3_key = Key(bucket) S3_key.key = payload['key'] target = settings.CACHE_ROOT + payload['target'].decode('utf-8') target_path = '/'.join(target.split('/')[0:-1]) if not os.path.isdir(target_path): os.makedirs(target_path) if os.path.exists(target): print "already exists in cache" else: S3_key.get_contents_to_filename(target) print "downloaded " + payload['key'] + " from s3"
def _upload_file_to_s3(self, bucket_name, keyname, filename, s3_key, s3_secret, s3_url, canned_acl='aws-exec-read'): if not has_euca: raise Exception("Euca2ools missing.. Required to run this function") s3euca = Euca2ool(is_s3=True) s3euca.ec2_user_access_key = s3_key s3euca.ec2_user_secret_key = s3_secret s3euca.url = s3_url conn = s3euca.make_connection() bucket_instance = _ensure_bucket(conn, bucket_name, canned_acl) k = Key(bucket_instance) k.key = keyname with open(filename, "rb") as the_file: try: logger.debug("Uploading File:%s to bucket:%s // key:%s" % (filename, bucket_name, keyname)) k.set_contents_from_file(the_file, policy=canned_acl) logger.debug("File Upload complete") except S3ResponseError, s3error: s3error_string = '%s' % (s3error) if s3error_string.find("403") >= 0: logger.exception("Permission denied while writing : %s\n%s" % (k.key, s3error))
def download(self, key_name, filename): k = Key(self.bucket) k.key = key_name k.get_contents_to_filename(filename) logger.info('Download %s -> %s', key_name, filename)
def upload(self, key, filename, is_public=False, metadata=None): k = Key(self.bucket) k.key = key headers = {'Cache-Control': 'max-age=31536000'} content_type, encoding = mimetypes.guess_type(filename) if content_type is not None: headers['Content-Type'] = content_type if encoding == 'gzip': headers['Content-Encoding'] = 'gzip' if metadata is not None: for key in metadata: headers['x-amz-meta-' + key] = metadata[key] for _ in xrange(5): try: k.set_contents_from_filename( filename, headers=headers, policy=('public-read' if is_public else 'private') ) logger.info('Upload %s -> %s', filename, k.name) break except Exception as e: logger.exception(e) logger.warn('Try upload again') else: logger.error('Retry more than 5 times, give it up.') raise ExceedMaxRetryError()
def upload_parts(self, bucket_instance, directory, parts, part_to_start_from, canned_acl=None, upload_policy=None, upload_policy_signature=None): if part_to_start_from: okay_to_upload = False else: okay_to_upload = True headers = {} if upload_policy: headers['S3UploadPolicy'] = upload_policy if upload_policy_signature: headers['S3UploadPolicySignature']=upload_policy_signature for part in parts: if part == part_to_start_from: okay_to_upload = True if okay_to_upload: print 'Uploading part:', part k = Key(bucket_instance) k.key = part part_file = open(os.path.join(directory, part), 'rb') try: k.set_contents_from_file(part_file, policy=canned_acl, headers=headers) except S3ResponseError, s3error: s3error_string = '%s' % s3error if s3error_string.find('403') >= 0: msg = 'Permission denied while writing:', k.key else: msg = s3error_string self.display_error_and_exit(msg)
def load_json(key_name): """ Get contents of key as json """ key = Key(_bucket, key_name) contents = key.get_contents_as_string() return json.loads(contents)
def upload_to_gs(bucket_name, client_id, client_secret, file, key, acl='public-read'): conn = GSConnection(client_id, client_secret, calling_format=OrdinaryCallingFormat()) bucket = conn.get_bucket(bucket_name) k = Key(bucket) # generate key filename = secure_filename(file.filename) key_dir = key + '/' + generate_hash(key) + '/' k.key = key_dir + filename # delete old data for item in bucket.list(prefix='/' + key_dir): item.delete() # set object settings file_data = file.read() file_mime = magic.from_buffer(file_data, mime=True) size = len(file_data) sent = k.set_contents_from_string( file_data, headers={ 'Content-Disposition': 'attachment; filename=%s' % filename, 'Content-Type': '%s' % file_mime } ) k.set_acl(acl) gs_url = 'https://storage.googleapis.com/%s/' % bucket_name if sent == size: return gs_url + k.key return False
def _read_s3(self): if self.snapshots: return conn = S3Connection(self.aws_access_key_id, self.aws_secret_access_key) bucket = conn.get_bucket(self.s3_bucket, validate=False) self.snapshots = [] prefix = self.base_path if not self.base_path.endswith('/'): prefix = "{!s}/".format(self.base_path) snap_paths = [snap.name for snap in bucket.list( prefix=prefix, delimiter='/')] # Remove the root dir from the list since it won't have a manifest file. snap_paths = [x for x in snap_paths if x != prefix] for snap_path in snap_paths: mkey = Key(bucket) manifest_path = '/'.join([snap_path, 'manifest.json']) mkey.key = manifest_path try: manifest_data = mkey.get_contents_as_string() except S3ResponseError as e: # manifest.json not found. logging.warn("Response: {!r} manifest_path: {!r}".format( e.message, manifest_path)) continue try: self.snapshots.append( Snapshot.load_manifest_file(manifest_data, self.s3_bucket)) except Exception as e: # Invalid json format. logging.error("Parsing manifest.json failed. {!r}".format( e.message)) continue self.snapshots = sorted(self.snapshots, reverse=True)
def if_file_exist(bucketname, filename, aws_access_key="", aws_secret_key=""): bucket = get_bucket(bucketname, aws_access_key, aws_secret_key) k = Key(bucket, filename) if k.exists(): return True else: return False
def Seppuku(why): # Get the instance ID r = requests.get("http://169.254.169.254/latest/meta-data/instance-id") if r.status_code != 200: wf.logger.logger.error("Seppuku() unable to get instance ID") exit(3) instance_id = r.text # Declare our intent wf.logger.logger.error("Seppuku(%s): Instance is stopping because [%s]" % (instance_id, why)) # Save a copy of the latest syslog to S3 s3_conn = boto.connect_s3() bucket = s3_conn.get_bucket('wf-instance-logs') key = Key(bucket) key.key = "%s.txt" % instance_id wf.logger.logger.error("Seppuku(%s): copying log to %s" % (instance_id, key.generate_url(0))) key.set_contents_from_filename('/var/log/syslog') # Now commit Seppuku ec2_conn = boto.ec2.connect_to_region("us-west-1") # this can throw an exception. Protect later. ec2_conn.terminate_instances(instance_ids=[instance_id]) time.sleep(60*5) # What! No sleep? Then halt subprocess.check_call(["sudo", "halt"]) time.sleep(60*5) exit(9)
def s3_upload(slug, keyname, absolute_path, bucket, tempdir): """ Upload a file to s3 """ conn = _s3conn() bucket = conn.get_bucket(bucket) mimetype = mimetypes.guess_type(absolute_path) options = { 'Content-Type' : mimetype[0] } # There's a possible race condition if files have the same name if mimetype[0] is not None and mimetype[0].startswith('text/'): upload = open(absolute_path); options['Content-Encoding'] = 'gzip' key_parts = keyname.split('/') filename = key_parts.pop() temp_path = os.path.join(tempdir, filename) gzfile = gzip.open(temp_path, 'wb') gzfile.write(upload.read()) gzfile.close() absolute_path = temp_path k = Key(bucket) k.key = '%s/%s' % (slug, keyname) k.set_contents_from_filename(absolute_path, options, policy='public-read')
def _upload_to_s3(filename): if not app.config.get('UPLOAD_SCREENSHOTS_TO_S3', False): return import boto from boto.s3.key import Key conn = boto.connect_s3() b = conn.get_bucket(app.config['S3_BUCKET']) k = Key(b) k.key = '{}/{}'.format( app.config.get('S3_FILES_PREFIX', 'sleepypuppy'), filename ) k.set_contents_from_filename( "{}/{}".format( app.config['UPLOAD_FOLDER'], filename ) ) os.remove( "{}/{}".format( app.config['UPLOAD_FOLDER'], filename ) )
def s3_key(bucket, gtfsfile): k = Key(bucket) irequest = gtfsfile.instance_request filename = gtfsfile.transload_url.split("/")[-1] filename = ".".join(filename.split(".")[:-1]) k.key = "uploads/%s/%s_%s.zip" % (irequest.id, str(uuid.uuid4()), filename) return k
def read_object_content(self, obj_id): if not self.conn: self.do_connect() k = Key(bucket=self.bucket, name=obj_id) return k.get_contents_as_string()
def test_key_save_to_missing_bucket(): conn = boto.connect_s3('the_key', 'the_secret') bucket = conn.get_bucket('mybucket', validate=False) key = Key(bucket) key.key = "the-key" key.set_contents_from_string.when.called_with("foobar").should.throw(S3ResponseError)
def upload_output_to_s3(job, job_vars): """ If s3_dir is specified in arguments, file will be uploaded to S3 using boto. WARNING: ~/.boto credentials are necessary for this to succeed! job_vars: tuple Tuple of dictionaries: input_args and ids """ import boto from boto.s3.key import Key input_args, ids = job_vars work_dir = job.fileStore.getLocalTempDir() uuid = input_args['uuid'] # Parse s3_dir s3_dir = input_args['s3_dir'] bucket_name = s3_dir.split('/')[0] bucket_dir = '/'.join(s3_dir.split('/')[1:]) # I/O uuid_tar = return_input_paths(job, work_dir, ids, 'uuid.tar.gz') # Upload to S3 via boto conn = boto.connect_s3() bucket = conn.get_bucket(bucket_name) k = Key(bucket) k.key = os.path.join(bucket_dir, uuid + '.tar.gz') k.set_contents_from_filename(uuid_tar)
def delete_file(bucketname, filename, aws_access_key="", aws_secret_key=""): if not if_file_exist(bucketname, filename, aws_access_key, aws_secret_key): return bucket = get_bucket(bucketname, aws_access_key, aws_secret_key) k = Key(bucket) k.key = filename bucket.delete_key(k)
force=False): local_file = os.path.join(LOCAL_PATH, local_file) # log.debug( "Establishing handle with bucket '%s'..." % bucket_name) b = None for i in range(0, 5): try: b = s3_conn.get_bucket(bucket_name) break except S3ResponseError, e: log.error("Problem connecting to bucket '%s', attempt %s/5" % (bucket_name, i)) time.sleep(2) if b is not None: # log.debug("Establishing handle with key object '%s'..." % remote_filename) k = Key(b, remote_filename) if k.exists() and not force: log.debug("Remote file '%s' already exists. Not overwriting it." % remote_filename) return True log.debug("Attempting to save local file '%s' to bucket '%s' as '%s'" % (local_file, bucket_name, remote_filename)) try: k.set_contents_from_filename(local_file) log.info("Successfully saved file '%s' to bucket '%s'." % (remote_filename, bucket_name)) return True except S3ResponseError, e: log.error( "Failed to save file local file '%s' to bucket '%s' as file '%s': %s" % (local_file, bucket_name, remote_filename, e))
def _generate_url(self, package): """ Generate a signed url to the S3 file """ key = Key(self.bucket, self.get_path(package)) return key.generate_url(self.expire_after)
"USAGE: s3_cache.py <download | upload> <friendly name> <dependencies file> <directory>" ) mode, friendly_name, dependencies_file, directory = argv conn = S3Connection() bucket = conn.lookup(BUCKET_NAME, validate=False) if bucket is None: raise SystemExit("Could not access bucket!") dependencies_file_hash = _sha256_of_file(dependencies_file) key = Key(bucket, dependencies_file_hash) key.storage_class = 'REDUCED_REDUNDANCY' if mode == 'download': download(directory) elif mode == 'upload': if isfile(NEED_TO_UPLOAD_MARKER): # FIXME upload(directory) else:
def exists(self, name): name = self._clean_name(name) k = Key(self.bucket, name) return k.exists()
# <codecell> ### Adding and deleting files from S3 #s3con = S3Connection('<aws access key>', '<aws secret key>') s3con = S3Connection('AKIAJRV3RN6NXQTSSTBA', '3e212d6rs99xtiPgwKnfN1QD30WZk2hJwCWjMcGc') # <codecell> #b = s3con.create_bucket('winteram-boto-example') b = s3con.get_bucket('wambia660fall2013') # <codecell> k = Key(b) k.key = 'mapper.py' k.set_contents_from_filename('/Users/winteram/Documents/Teaching/mapper.py') k.close() # <codecell> k = Key(b) k.key = 'preview_mapper.py' k.set_contents_from_filename( '/Users/winteram/Documents/Teaching/BIA_Fall2013/preview_mapper.py') k.close() # <codecell> k = Key(b)
def main(bucket, inputfile, outputfile, inicio_s, final_s, min_samples_c1, min_samples_c2): conn = boto.connect_s3() b = conn.get_bucket(bucket) k = Key(b) k.key = inputfile contenido = k.get_contents_as_string() decodificada = contenido.decode("utf-8") raw = json.loads(decodificada) ld = pd.DataFrame(raw['locations']) # In[3]: coords = ld[['latitudeE7', 'longitudeE7', 'timestampMs']] coords['timestampMs'] = coords['timestampMs'].apply(pd.to_numeric) # In[6]: #inicio_s="01/08/2016" #final_s="30/03/2017" inicio = 1000 * time.mktime( datetime.datetime.strptime(inicio_s, "%d/%m/%Y").timetuple()) final = 1000 * time.mktime( datetime.datetime.strptime(final_s, "%d/%m/%Y").timetuple()) # In[7]: coords3 = coords[(coords['timestampMs'] > inicio) & (coords['timestampMs'] < final)] # In[8]: coords3.columns = ['lat', 'lon', 'timestamp'] coords3['lat'] = coords3['lat'] / 1e7 coords3['lon'] = coords3['lon'] / 1e7 # In[9]: cosa = coords3[['lat', 'lon']] min_samples = np.max([len(cosa) * min_samples_c1, 700]) scaler = StandardScaler() scaler.fit(cosa) X = scaler.fit_transform(cosa) direcciones = {} kms_per_radian = 6371.0088 epsilon = 1 / kms_per_radian db = DBSCAN(eps=epsilon, min_samples=min_samples, algorithm='ball_tree', metric='haversine').fit(X) # In[10]: labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) #direcciones={} df_out = pd.DataFrame(columns=['lat', 'lon', 'type']) # In[11]: if (n_clusters_ > 0): clusters = [X[labels == i] for i in range(n_clusters_)] c0 = scaler.inverse_transform(clusters[0]) c0r = pd.DataFrame(data=c0[0:, 0:]) c0r.columns = ['lat', 'lon'] c0r['cluster'] = 0 for i in range(n_clusters_): c0 = scaler.inverse_transform(clusters[i]) c0r = pd.DataFrame(data=c0[0:, 0:]) c0r.columns = ['lat', 'lon'] lon = np.mean(c0r['lon']) lat = np.mean(c0r['lat']) df_out.loc[i] = [lat, lon, 0] # In[12]: df2 = X[labels == -1] X = df2 min_samples = len(df2) * min_samples_c2 db = DBSCAN(eps=epsilon, min_samples=min_samples, algorithm='ball_tree', metric='haversine').fit(X) labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) k.key = outputfile if (n_clusters_ > 0): n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) clusters = [X[labels == i] for i in range(n_clusters_)] c0 = scaler.inverse_transform(clusters[0]) c0r = pd.DataFrame(data=c0[0:, 0:]) c0r.columns = ['lat', 'lon'] c0r['cluster'] = 0 direcciones = {} for i in range(n_clusters_): c0 = scaler.inverse_transform(clusters[i]) c0r = pd.DataFrame(data=c0[0:, 0:]) c0r.columns = ['lat', 'lon'] c0r['cluster'] = i lon = np.mean(c0r['lon']) lat = np.mean(c0r['lat']) df_out.loc[len(df_out)] = [lat, lon, 1] output = df_out.to_csv(encoding='utf-8') k.set_contents_from_string(output) k.make_public() else: #print('acabamos') k.set_contents_from_string('0 clusters') k.make_public()
with open('/home/hduser/.ec2/aws_key') as f: lines = f.read().splitlines() aws_key = lines[0] print " Using aws_key:", aws_key from boto.s3.connection import S3Connection from boto.s3.key import Key # Initiate a S3 connection using key and secret conn = S3Connection(aws_id, aws_key) # The bucket name bucket = 'home2-0xdiag-datasets' pb = conn.get_bucket(bucket) # Make an S3 key using the bucket k = Key(pb) file_name_to_use_in_s3 = "%s/%s" % (args.s3_path, os.path.basename(args.local_file_path)) # Set the name of the file to use in S3 # S3 doesn't have the concept of directories # Use / in the file name to mimic the directory path k.name = file_name_to_use_in_s3 k.set_metadata( 'Cache-Control', 'no-store, no-cache, must-revalidate, post-check=0, pre-check=0') # Send the file to S3 k.set_contents_from_filename(args.local_file_path) print " Sent %s to %s" % (args.local_file_path, file_name_to_use_in_s3)
logger.addHandler(hdlr) # In[6]: with open('config.json') as data_file: data1 = json.load(data_file) # In[9]: conn = S3Connection(data1['AWSAccess'], data1['AWSSecret']) # In[12]: bucket_name = "assignment2info7390" initial_file = "zillow_clean.csv" conn.create_bucket(bucket_name) existingbucket = conn.get_bucket(bucket_name) initial_data = Key(existingbucket) initial_data.key = initial_file initial_data.set_contents_from_filename(initial_file) # In[ ]: logger.info('Successfully uploaded the data to AWS S3 bucket')
for filename in file_list: if filename.endswith("link"): if "_manifests/revisions/sha256" in filename: all_manifests.add( re.sub( '.*docker/registry/v2/repositories/.*/_manifests/revisions/sha256/(.*)/link', '\\1', filename)) elif "_manifests/tags/" in filename and filename.endswith( "/current/link"): linked_manifest_files.add(filename) #fetch linked_manifest_files for filename in linked_manifest_files: error = False if storage_on_s3: k = Key(bucket) k.key = filename #Get the shasum from the link file shasum = k.get_contents_as_string().decode().split(":")[1] #Get the manifest json to check if its a manifest list k.key = "%s/sha256/%s/%s/data" % (blob_dir, shasum[0:2], shasum) try: manifest = json.loads(k.get_contents_as_string().decode()) except Exception as e: error = True print("Caught error trying to read manifest, ignoring.") else: shasum = open(filename, 'r').read().split(":")[1]
print(f'diff: {diff:5d}, rec: {recording}, fps: {fps:0.1f}', end='\r') # Write in out ti GIF gif_file = datetime.now().strftime('%Y%m%d_%H%M%S') + '.gif' pils = [Image.fromarray(images[i][:, :, 0]) for i in range(len(images))] pils[0].save(gif_file, save_all=True, append_images=pils) s3_secret_access_key = '<secret access key>' # NOT A GOOD IDEA s3_access_key = '<access key>' region = 'us-east-1' bucket_name = '<bucket name>' # Upload to S3 conn = boto.connect_s3(s3_access_key, s3_secret_access_key) bucket = conn.get_bucket(bucket_name, validate=True) k = Key(bucket) k.key = gif_file sent = k.set_contents_from_file(open(gif_file, 'rb')) k.set_acl('public-read') # Send out sms with media embed account_sid = '<account sid>' auth_token = '<auth token>' client = Client(account_sid, auth_token) message = client.messages \ .create( body="cam trigger" ,media_url=[f'https://{bucket_name}.s3.amazonaws.com/{gif_file}'] ,from_='+<twilio phone>' ,to='+<my phone>'
def upload(file_name): k = Key(bucket) k.key = file_name k.set_contents_from_filename(root_dir + file_name) print 'Uploading %s' % root_dir + file_name
'time_stamp': time() } # Write the output to a .json file tmpFile = '/tmp/space-usage.json' with open(tmpFile, 'w') as outfile: json.dump(data, outfile) # Run the Radosgw-admin command to retrieve the quota S3 credentials to upload json file to bucket. cmd = ['radosgw-admin', 'user', 'info', '--uid', USER] p = subprocess.Popen(cmd, stdout=subprocess.PIPE) ui = p.communicate()[0] uid = json.loads(ui) S3_ACCESS_KEY = uid['keys'][0]['access_key'] S3_SECRET_KEY = uid['keys'][0]['secret_key'] ssl._https_verify_certificates(False) conn = boto.connect_s3( aws_access_key_id=S3_ACCESS_KEY, aws_secret_access_key=S3_SECRET_KEY, host=HOST, port=PORT, is_secure=SSL, calling_format=boto.s3.connection.OrdinaryCallingFormat(), ) b = conn.get_bucket(JSONDST) k = Key(b) k.key = 'space-usage.json' k.set_contents_from_filename(tmpFile)
def perform(args): performStart = datetime.now() md5 = None replayDB = None try: sc2reader_to_esdb = SC2ReaderToEsdb() # # at this point the 'hash' may actually be an S3 key like '/uploads/1234-5667-1234234/filename.sc2replay' # or simply '{md5}' # # not to worry, in a few lines, we'll rename the S3 key to be md5.sc2replay # filename = args['hash'] if re.search('.sc2replay', filename, re.IGNORECASE) is None: filename = filename + ".SC2Replay" bucket = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)\ .get_bucket(settings.REPLAY_BUCKET_NAME) # logging.getLogger("jobs").info("trying to get key {}".format(filename)); k = bucket.get_key(filename) replaystring = k.get_contents_as_string() md5 = hashlib.md5(replaystring).hexdigest() # # rename the S3 key to simply be md5.SC2Replay, so it's easier for us to find it # when we need it. # # http://stackoverflow.com/questions/2481685/amazon-s3-boto-how-to-rename-a-file-in-a-bucket k.copy(settings.REPLAY_BUCKET_NAME, md5 + ".SC2Replay", metadata=None, preserve_acl=False) replayDB, blob = sc2reader_to_esdb.processReplay(StringIO(replaystring), args['channel']) if len(blob) > 0: blobbucket = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)\ .get_bucket(settings.BLOB_BUCKET_NAME) k = Key(blobbucket) k.key = "%i" % (replayDB.match.id) blobdump = json.dumps(blob) k.set_contents_from_string(blobdump) except Exception as e: tb = traceback.format_exc() exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logging.getLogger("jobs").info("parsing failed for replay {}. oh well. exception={}. {} {} {} {}".format(args['hash'].encode('ascii'), e, exc_type, fname, exc_tb.tb_lineno, tb)) pass finally: alldone = datetime.now() # Enqueue ruby PostParse job, always. ResQ(server=settings.REDIS_SERVER).enqueue_from_string('ESDB::Jobs::Sc2::Replay::PostParse', 'replays-high', { 'uuid': args['uuid'], 'hash': md5, 'provider_id': str(args['provider_id']), 'ggtracker_received_at': args['ggtracker_received_at'], 'esdb_received_at': args['esdb_received_at'], 'preparse_received_at': args['preparse_received_at'], 'jobspy_received_at': performStart.strftime('%s.%f'), 'jobspy_done_at': alldone.strftime('%s.%f'), }) # regarding converting times to floating point seconds since the # epoch, using %s above is dangerous because its not python, it # calls the underlying OS. i tried using the solution here: # http://stackoverflow.com/questions/6999726/python-getting-millis-since-epoch-from-datetime/11111177#11111177 # but i ran into timezone issues and did the lazy thing instead. matchId = 0 if replayDB and hasattr(replayDB, "match") and replayDB.match.id: matchId = replayDB.match.id logging.getLogger("jobs").info("all done with match {}. total time in ParseReplay.perform() = {}".format(matchId, alldone - performStart))
def s3_delete(key_path): if CHUNKS_FOLDER not in key_path: raise Exception("absolutely not deleting %s" % key_path) key = Key(_get_bucket(S3_BUCKET), key_path) key.delete()
def store(name): k = Key(bucket, prefix + name) k.set_contents_from_string('somedata')
def upload(self, local, remote, ignoreMissing=False, force=False, hash=None): # maybe upload and download should use trailing slash to indicate directory should be uploaded instead of just a file assert not remote.startswith("/") # assert not local.startswith("/") remote_path = os.path.normpath(self.remote_path + "/" + remote) local_path = os.path.normpath(os.path.join(self.local_dir, local)) # cope when case where local was passed as an abs path # local = os.path.relpath(local, self.local_dir) # assert not local.startswith("."), "local={}, local_dir={}".format(local, self.local_dir) # local_path = local uploaded_url = None if os.path.exists(local_path): if os.path.isfile(local_path): # if it's a file, upload it uploaded_url = "s3://" + self.bucket.name + "/" + remote_path if self.bucket.get_key(remote_path) is None or force: key = Key(self.bucket) key.name = remote_path log.info("Uploading file %s to %s", local, uploaded_url) key.set_contents_from_filename(local_path) if hash is None: hash = calc_hash(local_path) key.set_metadata("sha256", hash) else: # upload everything in the dir assert hash is None for fn in os.listdir(local_path): full_fn = os.path.join(local_path, fn) if os.path.isfile(full_fn): r = os.path.join(remote_path, fn) if self.bucket.get_key(r) is None or force: k = Key(self.bucket) k.key = r log.info("Uploading dir %s (%s to %s)", local_path, fn, fn) k.set_contents_from_filename(full_fn) hash = calc_hash(local_path) k.set_metadata("sha256", hash) else: log.info("Uploading dir %s (%s to %s), skiping existing file", local_path, fn, fn) elif not ignoreMissing: raise Exception("Could not find {}".format(local)) return uploaded_url
def save(self): conn = boto.connect_s3('the_key', 'the_secret') bucket = conn.get_bucket('mybucket') k = Key(bucket) k.key = self.name k.set_contents_from_string(self.value)
def test_ranged_get(): conn = boto.connect_s3() bucket = conn.create_bucket('mybucket') key = Key(bucket) key.key = 'bigkey' rep = b"0123456789" key.set_contents_from_string(rep * 10) # Implicitly bounded range requests. key.get_contents_as_string(headers={ 'Range': 'bytes=0-' }).should.equal(rep * 10) key.get_contents_as_string(headers={ 'Range': 'bytes=50-' }).should.equal(rep * 5) key.get_contents_as_string(headers={ 'Range': 'bytes=99-' }).should.equal(b'9') # Explicitly bounded range requests starting from the first byte. key.get_contents_as_string(headers={ 'Range': 'bytes=0-0' }).should.equal(b'0') key.get_contents_as_string(headers={ 'Range': 'bytes=0-49' }).should.equal(rep * 5) key.get_contents_as_string(headers={ 'Range': 'bytes=0-99' }).should.equal(rep * 10) key.get_contents_as_string(headers={ 'Range': 'bytes=0-100' }).should.equal(rep * 10) key.get_contents_as_string(headers={ 'Range': 'bytes=0-700' }).should.equal(rep * 10) # Explicitly bounded range requests starting from the / a middle byte. key.get_contents_as_string(headers={ 'Range': 'bytes=50-54' }).should.equal(rep[:5]) key.get_contents_as_string(headers={ 'Range': 'bytes=50-99' }).should.equal(rep * 5) key.get_contents_as_string(headers={ 'Range': 'bytes=50-100' }).should.equal(rep * 5) key.get_contents_as_string(headers={ 'Range': 'bytes=50-700' }).should.equal(rep * 5) # Explicitly bounded range requests starting from the last byte. key.get_contents_as_string(headers={ 'Range': 'bytes=99-99' }).should.equal(b'9') key.get_contents_as_string(headers={ 'Range': 'bytes=99-100' }).should.equal(b'9') key.get_contents_as_string(headers={ 'Range': 'bytes=99-700' }).should.equal(b'9') # Suffix range requests. key.get_contents_as_string(headers={ 'Range': 'bytes=-1' }).should.equal(b'9') key.get_contents_as_string(headers={ 'Range': 'bytes=-60' }).should.equal(rep * 6) key.get_contents_as_string(headers={ 'Range': 'bytes=-100' }).should.equal(rep * 10) key.get_contents_as_string(headers={ 'Range': 'bytes=-101' }).should.equal(rep * 10) key.get_contents_as_string(headers={ 'Range': 'bytes=-700' }).should.equal(rep * 10) key.size.should.equal(100)
import boto from boto.s3.key import Key keyId = "AKIASVJC5W6APZJXPRMT" sKeyId = "pTaagvrPHV/3WvGGkRIC/JQiGKJaHL3kqtXoOWOM" fileName = 'docker-compose.yml' bucketName = "finbotaxep" conn = boto.connect_s3(keyId, sKeyId) bucket = conn.get_bucket(bucketName) file = open(fileName) k = Key(bucket) k.key = fileName result = k.set_contents_from_file(file)
def test_bucket_name_with_dot(): conn = boto.connect_s3() bucket = conn.create_bucket('firstname.lastname') k = Key(bucket, 'somekey') k.set_contents_from_string('somedata')
def main(): # parse options from the command line parser = argparse.ArgumentParser( prog='PROG', formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent('''\ ------------------------------------------------------------------------------------------------------------- This is a deep neural network architecture for training sparse filters. Example uses: $ python test.py $ python test.py -m GroupSF -v 1 -g 3 -s 1 $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100 $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat -i 100 150 -t y -v 1 In the convolutional case, the extra "1" is added automatically for broadcasting. ------------------------------------------------------------------------------------------------------------- ''')) parser.add_argument("-m", "--model", default=['SparseFilter'], nargs='+', help="the model type") parser.add_argument("-c", "--convolution", default="n", help="convolution, yes or no") parser.add_argument("-f", "--filename", default="patches.mat", help="the data filename") parser.add_argument( "-d", "--dimensions", type=int, nargs='+', default=([100, 256]), help= "the dimensions of the model: [neurons, input size] or [neurons, length, width]" ) parser.add_argument("-p", "--pool", type=int, nargs='+', default=None, help="pooling dimensions") parser.add_argument("-g", "--group", type=int, default=None, help="group size") parser.add_argument("-s", "--step", type=int, default=None, help="step size") parser.add_argument("-l", "--learn_rate", type=float, default=.001, help="learning rate") parser.add_argument("-i", "--iterations", type=int, nargs='+', default=[100], help="number of iterations") parser.add_argument("-v", "--verbosity", type=int, default=0, help="verbosity: 0 no plot; 1 plots") parser.add_argument("-o", "--opt", default="GD", help="optimization method: GD or L-BFGS") parser.add_argument("-w", "--whitening", default='n', help="whitening: 'y' or 'n'") parser.add_argument("-t", "--test", default='n', help="test classification performance: 'y' or 'n'") parser.add_argument("-a", "--channels", type=int, default=1, help="number of channels in data") parser.add_argument("-e", "--examples", type=int, default=None, help="number of training examples") parser.add_argument("-b", "--batch_size", type=int, default=1000, help="number of examples in [mini]batch") parser.add_argument("-z", "--aws", default='n', help="run on aws: 'y' or 'n'") args = parser.parse_args() args.dimensions = parse_dims(args) args.iterations = parse_iter(args) ''' =================================== Load in the data =================================== ''' # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", args.filename) data = loadmat(file_path)['X'] # reshape and preprocess data print "pre-processing data ..." video = None if args.filename == 'patches_video.mat': video = data data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T if args.convolution == 'n': if args.whitening == 'y': data -= data.mean(axis=0) data = whiten(data) elif args.whitening == 'n' and args.channels == 1: data -= data.mean(axis=0) # elif args.whitening == 'n' and args.channels == 3: # data = np.float32(data) data = np.float32(data.T) elif args.convolution == 'y': if args.filename == 'kyotoData.mat': data = np.float32( data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=9) elif args.filename == 'CIFAR_data.mat': data = np.float32( data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=5) data = data[0:args.examples, :, :, :] elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat': data = np.float32( data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:args.examples, :, :, :] args.channels = data.shape[1] for channel in range(args.channels): data[:, channel, :, :] = np.reshape( scaling.LCNinput(data[:, channel, :, :].reshape( (data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), (data.shape[0], data.shape[2], data.shape[3])) # assert that batch size is valid and get number of batches n_batches, rem = divmod(data.shape[0], args.batch_size) assert rem == 0 # other assertions assert len(args.model) == len(args.iterations) if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF': assert args.group is not None assert args.step is not None ''' ============================= Build and train the network ============================= ''' # construct the network print "building model..." model = sf.Network(model_type=args.model, weight_dims=args.dimensions, p=args.pool, group_size=args.group, step=args.step, lr=args.learn_rate, opt=args.opt, c=args.convolution, test=args.test, batch_size=args.batch_size ) # TODO: custom learning rates for each layer # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # train the sparse filtering network print "training network..." t = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs if args.opt == 'GD': for epoch in xrange(args.iterations[l]): # go though [mini]batches for batch_index in xrange(n_batches): c, w = train[l](index=batch_index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) elif args.opt == 'L-BFGS': w = minimize(train[l], model.layers[l].w.eval().flatten(), method='L-BFGS-B', jac=True, options={ 'maxiter': args.iterations[l], 'disp': True }) if args.convolution == 'n': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1]) elif args.convolution == 'y': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1], args.dimensions[0][2], args.dimensions[0][3]) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - t print('Elapsed training time: %f' % elapsed) # create sub-folder for saved model if args.aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif args.aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # save the model for later use full_path = directory_name + '/model.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) if args.aws == 'y': k.key = full_path k.set_contents_from_filename(full_path) os.remove(full_path) # save weights separately savemat(directory_name + '/weights.mat', weights) if args.aws == 'y': k.key = directory_name + '/weights.mat' k.set_contents_from_filename(directory_name + '/weights.mat') os.remove(directory_name + '/weights.mat') # create log file log_file = open(directory_name + "/log.txt", "wb") for m in range(len(args.model)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, args.model[m], args.dimensions[m], args.iterations[m])) if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF': log_file.write(" Groups: %d \n Step: %d" % (args.group, args.step)) ex = data.shape[0] if args.examples is not None: ex = args.examples log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" % (args.filename, ex, args.whitening)) log_file.write('\nElapsed training time: %f' % elapsed) log_file.close() if args.aws == 'y': k.key = directory_name + "/log.txt" k.set_contents_from_filename(directory_name + "/log.txt") os.remove(directory_name + "/log.txt") ''' =============================== Verbosity Options ===================================== ''' # get variables and saves if args.verbosity >= 1: # # get variables of interest # activations_norm = {} # activations_raw = {} # activations_shuffled = {} # reconstruction = {} # error_recon = {} # pooled = {} # for l in xrange(len(args.dimensions)): # activations_norm['layer' + str(l)] = {} # activations_raw['layer' + str(l)] = {} # activations_shuffled['layer' + str(l)] = {} # reconstruction['layer' + str(l)] = {} # error_recon['layer' + str(l)] = {} # pooled['layer' + str(l)] = {} for batch in xrange(n_batches): # get variables of interest activations_norm = {} activations_raw = {} activations_shuffled = {} reconstruction = {} error_recon = {} pooled = {} # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() begin = batch * args.batch_size end = begin + args.batch_size f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end]) # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat # activations_raw['layer' + str(l)]['batch' + str(batch)] = f # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled # reconstruction['layer' + str(l)]['batch' + str(batch)] = err # error_recon['layer' + str(l)]['batch' + str(batch)] = rec # pooled['layer' + str(l)]['batch' + str(batch)] = p activations_norm['layer' + str(l) + '_batch' + str(batch)] = f_hat activations_raw['layer' + str(l) + '_batch' + str(batch)] = f activations_shuffled['layer' + str(l) + '_batch' + str(batch)] = f_hat_shuffled reconstruction['layer' + str(l) + '_batch' + str(batch)] = err error_recon['layer' + str(l) + '_batch' + str(batch)] = rec pooled['layer' + str(l) + '_batch' + str(batch)] = p # save model as well as weights and activations separately savemat( directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat', activations_norm) savemat( directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat', activations_raw) if args.aws == 'y': k.key = directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + \ str(batch) + '.mat' k.set_contents_from_filename(directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') os.remove(directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') k.key = directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + \ str(batch) + '.mat' k.set_contents_from_filename(directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') os.remove(directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') # savemat(directory_name + '/weights.mat', weights) # if args.aws == 'y': # k.key = directory_name + '/weights.mat' # k.set_contents_from_filename(directory_name + '/weights.mat') # os.remove(directory_name + '/weights.mat') # # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size]) # # activations_norm['layer' + str(l)] = f_hat # activations_raw['layer' + str(l)] = f # activations_shuffled['layer' + str(l)] = f_hat_shuffled # reconstruction['layer' + str(l)] = err # error_recon['layer' + str(l)] = rec # pooled['layer' + str(l)] = p # # # save model as well as weights and activations separately # savemat(directory_name + '/weights.mat', weights) # savemat(directory_name + '/activations_norm.mat', activations_norm) # savemat(directory_name + '/activation_raw.mat', activations_raw) # display figures if args.verbosity == 2: # if GD, plot the cost function over time if args.opt == 'GD': visualize.plotCost(cost) # visualize the receptive fields of the first layer visualize.drawplots(weights['layer0'].T, color='gray', convolution=args.convolution, pad=0, examples=None, channels=args.channels) # visualize the distribution of lifetime and population sparseness for l in xrange(len(args.dimensions)): layer = 'layer' + str(l) if args.convolution == 'n': visualize.dispSparseHist(activations_norm[layer], l) elif args.convolution == 'y': visualize.dispSparseHist(activations_shuffled[layer].reshape( args.dimensions[l][0], data.shape[0] * activations_shuffled[layer].shape[2] * activations_shuffled[layer].shape[3]), layer=l) # visualize the distribution of activity across the "cortical sheet" and reconstruction if args.filename == 'patches_video.mat': f_hat = activations_norm['layer0'].T.reshape( video.shape[0], video.shape[1], args.dimensions[0][0]) visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1) else: visualize.drawplots(activations_norm['layer0'], color='gray', convolution=args.convolution, pad=1, examples=100) # # visualize reconstruction capabilities # if args.convolution == 'n': # visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1) # elif args.convolution == 'y': # visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'], # color='gray', convolution=args.convolution) # print('Reconstructed error: %e' % reconstruction['layer0']) # additional visualizations for convolutional network if args.convolution == 'y': dim = activations_raw['layer0'].shape[2] # visualize an example of a convolved image visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim) # print activations_raw['layer0'] # visualize max-pooled activations and LCN output visualize.visualize_convolved_image( pooled['layer0'][0, :, :, :].reshape( 1, pooled['layer0'].shape[1], pooled['layer0'].shape[2], pooled['layer0'].shape[3]), dim=dim / 2) # visualize an example of a LCNed convolved image after max pooling # temp = activations_raw['layer0'] #[0, :, :, :] temp = pooled['layer0'] #[0, :, :, :] # print temp.shape for i in range(temp.shape[1]): temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape( (1, 1, dim / 2, dim / 2)), kernel_shape=5) # temp = scaling.LCNinput(temp, kernel_shape=5) visualize.visualize_convolved_image(temp, dim=dim / 2) # print temp ''' ================================ Test the Model ======================================= ''' # test the model if evaluating classification performance if args.test == 'y': from sklearn import svm from sklearn.metrics import confusion_matrix train_labels = loadmat(file_path)['y'] file_path = os.path.join(base_path, "data", "CIFAR_test.mat") test_data = loadmat(file_path)['X'] test_labels = loadmat(file_path)['y'] # reshape and normalize the data if args.convolution == 'y': test_data = np.float32( test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])), int(np.sqrt(test_data.shape[1])))) test_data = scaling.LCNinput(test_data, kernel_shape=5) test_data = test_data[0:args.examples, :, :, :] # get SVM test results for pixels to last layer train_input = None for layer in range(model.n_layers + 1): # pixel inputs if layer == 0: test_input = test_data.reshape( test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]) train_input = data.reshape( data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? test_input = test[layer - 1](test_data[0:args.batch_size]) test_input = test_input[0].reshape( test_input[0].shape[0], test_input[0].shape[1] * test_input[0].shape[2] * test_input[0].shape[3]) train_input = activations_norm['layer' + str(layer - 1)] train_input = train_input.reshape( train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # train linear support vector machine clf = svm.SVC(kernel="linear").fit( train_input, np.ravel(train_labels[0:args.examples])) # get predictions from SVM and calculate accuracy predictions = clf.predict(test_input) accuracy = clf.score(test_input, test_labels[0:args.examples]) # display results and log them print("Accuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)) cm = confusion_matrix(test_labels[0:args.examples], predictions) log_file = open(directory_name + "/log.txt", "a") log_file.write("\nAccuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)) log_file.close() # visualize the confusion matrix if args.test == 'y' and args.verbosity == 2: import pylab as pl pl.imshow(cm, interpolation='nearest') pl.title('Confusion Matrix for Network') pl.colorbar() pl.ylabel('True Label') pl.xlabel('Predicted Label') pl.show()
import boto import time s3 = boto.connect_s3() bucket = s3.get_bucket('utabucket') from boto.s3.key import Key start_time = time.clock() k = Key(bucket) k.key = 'cloud' k.set_contents_from_filename('C:/Users/kavya dukkipati/Downloads/us-pci.xls') k1 = Key(bucket) k1.key = 'aws' k1.set_contents_from_filename('C:/Users/kavya dukkipati/Downloads/hd2013.csv') stop_time = time.clock() time_taken = print(stop_time - start_time)
from boto.s3.connection import S3Connection from boto.s3.key import Key from dl import main # set up api auth = tweepy.OAuthHandler(os.environ.get('CONSUMER_KEY'), os.environ.get('CONSUMER_SECRET')) auth.set_access_token(os.environ.get('ACCESS_TOKEN'), os.environ.get('ACCESS_SECRET')) api = tweepy.API(auth) # import s3 database for line.txt s3 = S3Connection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY']) bucket = s3.get_bucket('birdcallbot-assets') k = Key(bucket) k.key = 'line.txt' # read line and add one for later line = int(k.get_contents_as_string()) k.set_contents_from_string(str(line + 1)) # make a new bird & populate folder with new media main(line) # what the bot will tweet with open('tweet.txt', 'r') as text: tweet = text.read() # update status media = api.upload_chunked( 'video.mp4') # using fitnr fork of tweepy: video_upload2 branch
# print score_distribution # print sum(score_distribution) return score_distribution # create connection to bucket c = S3Connection('AKIAIQQ36BOSTXH3YEBA', 'cXNBbLttQnB9NB3wiEzOWLF13Xw8jKujvoFxmv3L') # create connection to bucket b = c.get_bucket('public.tenthtee') # get field from first round tee times k = Key(b) k.key = 'sportsData/' + str(year) + '/' + tournament_name + '/field.json' field_string = k.get_contents_as_string() field = json.loads(field_string) #print field # get hole distribution(s) k2 = Key(b) if tournament_name == 'World Golf Championships - Cadillac Championship': k2.key = 'sportsData/' + str(year - 1) + '/WGC Cadillac Championship/scores.json' else: k2.key = 'sportsData/' + str(year - 1) + '/' + tournament_name + '/scores.json' scores_string = k2.get_contents_as_string() scores = json.loads(scores_string)
def upload_to_s3(file_content, folder_path, name, public=False): """ Uploads given file object to S3. :param file_content: Content of file to upload :type file_content: str :param folder_path: The folder path (not bucket) :type folder_path: str :param name: Name of S3 key, after folder_path :type name: str :param public: Set whether it's public-read :type public: boolean :return: URL used to upload to S3 :rtype: (str, Key) """ import os name = str(name) # in case filename is number (like candidate id) b, c = get_s3_bucket_and_conn() k = Key(b) key_name = '%s/%s' % (folder_path, os.path.basename(name)) k.key = key_name policy = 'public-read' if public else None k.set_contents_from_string(file_content, policy=policy) # Set Content-Type headers if name.endswith('pdf'): k.set_metadata('Content-Type', 'application/pdf') # default is application/octet-stream, but doesn't work on Chrome with PDFs elif name.endswith('doc'): k.set_metadata('Content-Type', 'application/msword') elif name.endswith('csv'): k.set_metadata('Content-Type', 'text/csv') # Query-string authentication bucket_name = app.config[TalentConfigKeys.S3_BUCKET_KEY] url = c.generate_url( expires_in=3600 * 24 * 365, # expires in 1 year, lol method='GET', bucket=bucket_name, key=k.key, query_auth=True ) return url, k
def put(self, filedata, content_type, remote_path, force=False): now = datetime.datetime.utcnow() then = now + datetime.timedelta(self.expiration_days) expires = then.strftime("%a, %d %b %Y %H:%M:%S GMT") if self.aws_prefix: remote_path = "%s/%s" % (self.aws_prefix, remote_path) (hexdigest, b64digest) = mediasync.checksum(filedata) raw_b64digest = b64digest # store raw b64digest to add as file metadata # create initial set of headers headers = { "x-amz-acl": "public-read", "Content-Type": content_type, "Expires": expires, "Cache-Control": 'max-age=%d' % (self.expiration_days * 24 * 3600), } key = self._bucket.get_key(remote_path) if key is None: key = Key(self._bucket, remote_path) key_meta = key.get_metadata('mediasync-checksum') or '' s3_checksum = key_meta.replace(' ', '+') if force or s3_checksum != raw_b64digest: key.set_metadata('mediasync-checksum', raw_b64digest) key.set_contents_from_string(filedata, headers=headers, md5=(hexdigest, b64digest)) # check to see if file should be gzipped based on content_type # also check to see if filesize is greater than 1kb if content_type in TYPES_TO_COMPRESS: key = Key(self._bucket, "%s.gz" % remote_path) filedata = mediasync.compress(filedata) (hexdigest, b64digest) = mediasync.checksum( filedata) # update checksum with compressed data headers[ "Content-Disposition"] = 'inline; filename="%sgz"' % remote_path.split( '/')[-1] headers["Content-Encoding"] = 'gzip' key.set_metadata('mediasync-checksum', raw_b64digest) key.set_contents_from_string(filedata, headers=headers, md5=(hexdigest, b64digest)) return True
def main(): ################################################################################ # S3 ################################################################################ s3 = boto.connect_s3( is_secure=True, calling_format=OrdinaryCallingFormat(), ) # Creating a bucket # bucket = s3.create_bucket('benns-new-bucket') # Getting a bucket bucket = s3.get_bucket('benns-new-bucket') # Put something in a bucket k = Key(bucket) k.key = 'item_1' k.set_contents_from_string('This is a test of S3') # Get something from bucket k = Key(bucket) k.key = 'item_1' k.get_contents_as_string() # Check what keys are in the bucket bucket.get_all_keys() ################################################################################ # SQS ################################################################################ # Connect to SQS sqs = boto.connect_sqs() # Connect to S3 s3 = boto.connect_s3() bucket = s3.get_bucket('benns-new-bucket') # Create a queue q = sqs.create_queue('my_message_pump') # Create a new message data = simplejson.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) key = bucket.new_key('2010-03-20/%s.json' % str(uuid.uuid4())) key.set_contents_from_string(data) message = q.new_message(body=simplejson.dumps({ 'bucket': bucket.name, 'key': key.name })) q.write(message) # Read the message q = sqs.get_queue('my_message_pump') message = q.read() if message is not None: msg_data = simplejson.loads(message.get_body()) key = boto.connect_s3().get_bucket(msg_data['bucket']).get_key( msg_data['key']) data = simplejson.loads(key.get_contents_as_string()) q.delete_message(message) ################################################################################ # EC2 ################################################################################ # # Configure new stuff # # only needs to be done once # key_pair = ec2.create_key_pair(ec2_key_name) # key_pair.save(ssh_loc) ## Setup new authorization # app.authorize(ip_protocol='tcp', # from_port=port_num, # to_port=port_num, # cidr_ip=cidr_ip) ec2 = boto.ec2.connect_to_region(aws_region) # us-west-2 specific AMI # us-west-1 specific AMI # ami = 'ami-a2490dc2' # reservation = ec2.run_instances(image_id=ami, # key_name=ec2_key_name, # security_group_ids=[security_group_name]) r = ec2.get_all_instances()[0] instance = r.instances[0] instance.ip_address # app = ec2.create_security_group(security_group_name, 'Application tier') app = ec2.get_all_security_groups(groupnames=[security_group_name])[0] # Create New Instance # instance = create_new_instance() # Set Env Host String env.host_string = "ubuntu@%s" % (instance.ip_address) env['key_filename'] = '~/.ssh/ec2-sample-key.pem' # env['hosts'] = ['ec2-52-42-53-34.us-west-2.compute.amazonaws.com'] env['password'] = '******' env['localuser'] = '******' env['use_shell'] = False env['sudo_user'] = '******' env['user'] = '******' env['abort_on_prompts'] = True env.prompts = { 'Is this ok [y/d/N]': 'y', 'Is this ok [y/d/N]:': 'y', 'Is this ok [y/d/N]: ': 'y' } sudo('yum update') # stop instance # ec2.stop_instances([instance.id]) # Begin Installation user = '******' remote_home_dir = '/home/' + user run('echo "{0} ALL=(ALL) ALL" >> /etc/sudoers'.format(env.user)) with settings(warn_only=True): sudo('sh', shell=False) sudo('useradd -U -m %s, shell=false' % user) # Install packages with yum sudo('yum install -y %s' % (" ".join(PACKAGES_LIST))) # Install pip sudo('curl -O http://pypi.python.org/packages/source/p/pip/pip-1.0.tar.gz') run('tar xvfz pip-1.0.tar.gz') sudo('cd pip-1.0 && python setup.py install') # Install virtualenv sudo('pip install virtualenv') venv_name = '%s-env' % user venv = os.path.join(remote_home_dir, venv_name) sudo('virtualenv --no-site-packages %s' % venv) # Install python requirements # put('requirements.txt', remote_home_dir, use_sudo=True) sudo('%s/bin/pip install -r %s/requirements.txt' % (venv, remote_home_dir))
def upload_S3(dir, file): k = Key(bucket) k.key = f setPinHigh() k.set_contents_from_filename(dir + f, cb=percent_cb, num_cb=10) setPinLow()
def save_to_s3(self, image_str): expires = datetime.datetime.now() + datetime.timedelta(days=60) expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT") k = Key(settings.S3_CONN.get_bucket(settings.S3_ICONS_BUCKET_NAME)) k.key = self.feed.s3_icons_key k.set_metadata('Content-Type', 'image/png') k.set_metadata('Expires', expires) k.set_contents_from_string(image_str.decode('base64')) k.set_acl('public-read') self.feed.s3_icon = True self.feed.save()
def key(geoid): bucket = s3.get_bucket('embed.censusreporter.org') keyname = s3_keyname(geoid) key = Key(bucket, keyname) return key