def _assets_get_bucket(): """ Get a reference to the assets bucket. """ s3 = utils.get_bucket(app_config.ASSETS_S3_BUCKET) return s3
def _assets_get_bucket(): """ Get a reference to the assets bucket. """ s3 = boto.connect_s3() return utils.get_bucket(app_config.ASSETS_S3_BUCKET['bucket_name'])
def update_downloads(): require('settings', provided_by=['production', 'staging']) with open('data/songs.csv') as f: rows = csv.DictReader(f) for row in rows: if not row['download_url']: print 'Missing download url' continue filename = row['download_url'].split('/')[-1] print filename download_request = requests.get(row['download_url'], stream=True) with open('downloads/%s' % filename, 'w') as f: for chunk in download_request.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.flush() bucket = utils.get_bucket(app_config.S3_BUCKET) deploy_file( bucket, 'downloads/%s' % filename, '%s/downloads/%s' % (app_config.PROJECT_SLUG, filename), headers={ 'Cache-Control': 'max-age=%i' % app_config.ASSETS_MAX_AGE, 'Content-Disposition': 'attachment; filename="%s"' % filename } )
def interactive_comparison(): """Compare two sentences separated by a semi-colon""" with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("(1) > ") sys.stdout.flush() sentence = sys.stdin.readline() contexts = [] while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) print("tokenids:", token_ids) # Which bucket does it belong to? bucket_id = get_bucket(en_vocab, sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get the output context vector output_context = model.step_context(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id) # Append the context so we can compute the dot product contexts.append(output_context) # Display the output print("bucket_id: ", bucket_id) print("output_context", output_context) # Now we compute similarity metrics if len(contexts) == 2: cosine_distance = cosine_similarity(*contexts) euclid_distance = np.linalg.norm(contexts[1] - contexts[0]) print('cosine_similarity', cosine_distance) print('euclid_distance', euclid_distance) print('-------------------------------') contexts = [] # Start again next_sentence = len(contexts) + 1 print("(%i) > " % next_sentence, end="") sys.stdout.flush() sentence = sys.stdin.readline()
def get_image_exists(imageHash): # check if screenshot already exists bucket = get_bucket(BUCKET) for blob in bucket.list_blobs(): if imageHash == blob.name: logger.info(f"Image {imageHash} already exists") return blob.public_url return ''
def deploy_file(src, dst, headers={}): """ Deploy a single file to S3, if the local version is different. """ bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name']) k = bucket.get_key(dst) s3_md5 = None if k: s3_md5 = k.etag.strip('"') else: k = Key(bucket) k.key = dst file_headers = copy.copy(headers) if 'Content-Type' not in headers: file_headers['Content-Type'] = mimetypes.guess_type(src)[0] # Gzip file if os.path.splitext(src)[1].lower() in GZIP_FILE_TYPES: file_headers['Content-Encoding'] = 'gzip' with open(src, 'rb') as f_in: contents = f_in.read() output = StringIO() f_out = gzip.GzipFile(filename=dst, mode='wb', fileobj=output) f_out.write(contents) f_out.close() local_md5 = hashlib.md5() local_md5.update(output.getvalue()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s (gzipped)' % (src, dst) k.set_contents_from_string(output.getvalue(), file_headers, policy='public-read') # Non-gzip file else: with open(src, 'rb') as f: local_md5 = hashlib.md5() local_md5.update(f.read()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s' % (src, dst) k.set_contents_from_filename(src, file_headers, policy='public-read')
def deploy_file(src, dst, max_age): """ Deploy a single file to S3, if the local version is different. If warn_threshold is a positive integer N, we warn if the file is bigger is larger than N bytes. """ bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name']) k = bucket.get_key(dst) s3_md5 = None if k: s3_md5 = k.etag.strip('"') else: k = Key(bucket) k.key = dst headers = { 'Content-Type': mimetypes.guess_type(src)[0], 'Cache-Control': 'max-age=%i' % max_age } # Gzip file if os.path.splitext(src)[1].lower() in GZIP_FILE_TYPES: headers['Content-Encoding'] = 'gzip' with open(src, 'rb') as f_in: contents = f_in.read() output = StringIO() f_out = gzip.GzipFile(filename=dst, mode='wb', fileobj=output) f_out.write(contents) f_out.close() local_md5 = hashlib.md5() local_md5.update(output.getvalue()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s (gzipped)' % (src, dst) k.set_contents_from_string(output.getvalue(), headers, policy='public-read') # Non-gzip file else: with open(src, 'rb') as f: local_md5 = hashlib.md5() local_md5.update(f.read()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s' % (src, dst) k.set_contents_from_filename(src, headers, policy='public-read')
def delete_folder(dst): """ Delete a folder from S3. """ bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name']) for key in bucket.list(prefix='%s/' % dst): print 'Deleting %s' % (key.key) key.delete()
def delete_folder(bucket_name, dst): """ Delete a folder from S3. """ bucket = utils.get_bucket(bucket_name) for key in bucket.list(prefix='%s/' % dst): print 'Deleting %s' % (key.key) key.delete()
def delete_folder(dst): """ Delete a folder from S3. """ bucket = utils.get_bucket(app_config.S3_BUCKET["bucket_name"]) for key in bucket.list(prefix="%s/" % dst): print "Deleting %s" % (key.key) key.delete()
def check_timestamp(): require('settings', provided_by=[production, staging]) bucket = utils.get_bucket(app_config.S3_BUCKET) k = Key(bucket) k.key = '%s/live-data/timestamp.json' % app_config.PROJECT_SLUG if k.exists(): return True else: return False
def delete_folder(bucket_name, dst): """ Delete a folder from S3. """ bucket = utils.get_bucket(bucket_name) for key in bucket.list(prefix='%s/' % dst): logger.info('Deleting %s' % (key.key)) key.delete()
def deploy_file(src, dst, headers={}): """ Deploy a single file to S3, if the local version is different. """ bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name']) k = bucket.get_key(dst) s3_md5 = None if k: s3_md5 = k.etag.strip('"') else: k = Key(bucket) k.key = dst file_headers = copy.copy(headers) if 'Content-Type' not in headers: file_headers['Content-Type'] = mimetypes.guess_type(src)[0] # Gzip file if os.path.splitext(src)[1].lower() in GZIP_FILE_TYPES: file_headers['Content-Encoding'] = 'gzip' with open(src, 'rb') as f_in: contents = f_in.read() output = StringIO() f_out = gzip.GzipFile(filename=dst, mode='wb', fileobj=output) f_out.write(contents) f_out.close() local_md5 = hashlib.md5() local_md5.update(output.getvalue()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s (gzipped)' % (src, dst) k.set_contents_from_string( output.getvalue(), file_headers, policy='public-read') # Non-gzip file else: with open(src, 'rb') as f: local_md5 = hashlib.md5() local_md5.update(f.read()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s' % (src, dst) k.set_contents_from_filename( src, file_headers, policy='public-read')
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: # Get token-ids for the input sentence. token_ids = data_utils.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) print("tokenids:", token_ids) # Which bucket does it belong to? bucket_id = get_bucket(en_vocab, sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # Get the output context vector output_context = model.step_context(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id) # Display the output print("bucket_id: ", bucket_id) print("output_context", output_context) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(" ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ])) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def check_timestamp(): require('settings', provided_by=[production, staging]) bucket = utils.get_bucket(app_config.S3_BUCKET) k = Key(bucket) k.key = '%s%s/live-data/timestamp.json' % ( app_config.LIVEBLOG_DIRECTORY_PREFIX, app_config.CURRENT_LIVEBLOG) if k.exists(): return True else: return False
def get(path): bucket = get_bucket( { "time": "time", "loom": "loom", "react": "react", "ok-help": "ok-help", "wiki": "wiki", }, "react-pr153", ) return serve_path(bucket, "/", path)
def encode(): """Encode all of the sentences to vector form""" train, dev, test = loader.getData() sentences = [] tokens = [] # Load the vocab en_vocab = get_english_vocab(DATA_DIR, VOCAB_SIZE) # Collect all the training sentences for i, row in pd.concat((train, test)).iterrows(): if isinstance(row["sentence1"], basestring) and isinstance( row["sentence2"], basestring): sentences.append(row["sentence1"]) sentences.append(row["sentence2"]) # Allocate the sentences to buckets bucketed = {} for sentence in sentences: bucket_id = get_bucket(en_vocab, sentence) bucketed.setdefault(bucket_id, []) bucketed[bucket_id].append(sentence) mapped = {} with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True, train_dir=TRAIN_DIR) model.batch_size = BATCH_SIZE # We decode 64 sentence at a time. # Iterate over each bucket for bucket_id, sentences in bucketed.iteritems(): for batch in chunker(sentences, BATCH_SIZE): data = [] for sentence in batch: token_ids = data_utils.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) expected_output = [] data.append((token_ids, expected_output)) encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: data}, bucket_id) contexts = model.step_context(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id) features = np.hstack(contexts) print 'Extracted another set of features with shape:', features.shape # Now we align sentences with their contexts for i, sentence in enumerate(batch): mapped[sentence] = features[i, :].tolist() print sentence print mapped[sentence] print "Saving sentences to %s" % JSON_NAME with open(JSON_NAME, 'w') as file: json.dump(mapped, file)
def check_timestamp(): """ Check if a timestamp file exists. """ require('settings', provided_by=[production, staging]) bucket = utils.get_bucket(app_config.S3_BUCKET) k = Key(bucket) k.key = 'live-data/timestamp.json' if k.exists(): return True else: return False
def get(path): bucket = get_bucket( { "time": "time", "loom": "loom", "react": "react", "ok-help": "ok-help", "wiki": "wiki", "docs": "docs", "cs170-website": "cs170-website", "cs170": "cs170-website", # simple default app for PR testing "static-server": "time", }, "time", ) return serve_path(bucket, "/", path)
def upload_to_bucket(imageContent, imageHash): try: bucket = get_bucket(BUCKET) logger.info("new image. Uploading screenshot") blob = bucket.blob(imageHash) blob.upload_from_file(BytesIO(base64.b64decode( imageContent)), content_type="image/png") # make public and return url blob.make_public() except Exception as e: raise RuntimeException( f"Problems while uploading screenshot. {str(e)}") logger.info(f"Screenshot at {blob.public_url}") return blob.public_url
def get_sentence_to_context_map(sentences): """ Process all of the sentences with the model Return a map between sentence text and the context vectors The order of the map is undefined due to the bucketing process """ # Load the vocab en_vocab = get_english_vocab(DATA_DIR, VOCAB_SIZE) # Allocate the sentences to buckets bucketed = {} for sentence in sentences: bucket_id = get_bucket(en_vocab, sentence) bucketed.setdefault(bucket_id, []) bucketed[bucket_id].append(sentence) mapped = {} with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True, train_dir=TRAIN_DIR) model.batch_size = BATCH_SIZE # We decode 64 sentence at a time. # Iterate over each bucket for bucket_id, sentences in bucketed.iteritems(): for batch in chunker(sentences, BATCH_SIZE): data = [] # Tokenize each sentence for sentence in batch: token_ids = data_utils.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) expected_output = [] data.append((token_ids, expected_output)) # Use the model to obtain contexts for each sentence in the batch encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: data}, bucket_id) contexts = model.step_context(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id) features = np.hstack(contexts) print 'Encoded {0} sentences into {1} dimensional vectors'.format( *features.shape) # Now we align sentences with their contexts for i, sentence in enumerate(batch): mapped[sentence] = features[i, :].tolist() return mapped
def deploy_folder(bucket_name, src, dst, headers={}, ignore=[]): """ Deploy a folder to S3, checking each file to see if it has changed. """ to_deploy = [] for local_path, subdirs, filenames in os.walk(src, topdown=True): rel_path = os.path.relpath(local_path, src) for name in filenames: if name.startswith('.'): continue src_path = os.path.join(local_path, name) skip = False for pattern in ignore: if fnmatch(src_path, pattern): skip = True break if skip: continue if rel_path == '.': dst_path = os.path.join(dst, name) else: dst_path = os.path.join(dst, rel_path, name) to_deploy.append((src_path, dst_path)) if bucket_name == app_config.STAGING_S3_BUCKET: public = False else: public = True bucket = utils.get_bucket(bucket_name) logger.info(dst) for src, dst in to_deploy: deploy_file(bucket, src, dst, headers, public=public)
def deploy_file(src, dst, headers={}): """ Deploy a single file to S3, if the local version is different. """ bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name']) k = bucket.get_key(dst) s3_md5 = None if k: s3_md5 = k.etag.strip('"') else: k = Key(bucket) k.key = dst file_headers = copy.copy(headers) if app_config.S3_BUCKET == app_config.STAGING_S3_BUCKET: policy = 'private' else: policy = 'public-read' if 'Content-Type' not in headers: file_headers['Content-Type'] = mimetypes.guess_type(src)[0] if file_headers['Content-Type'] == 'text/html': # Force character encoding header file_headers['Content-Type'] = '; '.join([ file_headers['Content-Type'], 'charset=utf-8']) with open(src, 'rb') as f: local_md5 = hashlib.md5() local_md5.update(f.read()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s' % (src, dst) k.set_contents_from_filename(src, file_headers, policy=policy)
def rm(path): """ Remove an asset from s3 and locally """ bucket = utils.get_bucket(app_config.ASSETS_S3_BUCKET) file_list = glob(path) found_folder = True # Add files in folders, instead of folders themselves (S3 doesn't have folders) while found_folder: found_folder = False for local_path in file_list: if os.path.isdir(local_path): found_folder = True file_list.remove(local_path) for path in os.listdir(local_path): file_list.append(os.path.join(local_path, path)) if len(file_list) > 0: utils.confirm("You are about to destroy %i files. Are you sure?" % len(file_list)) for local_path in file_list: logger.info(local_path) if os.path.isdir(local_path): file_list.extend(os.listdir(local_path)) continue key_name = local_path.replace(ASSETS_ROOT, app_config.ASSETS_SLUG, 1) key = bucket.get_key(key_name) _assets_delete(local_path, key)
def _check_slug(slug): """ Does slug exist in graphics folder or production s3 bucket? """ graphic_path = '%s/%s' % (app_config.GRAPHICS_PATH, slug) if os.path.isdir(graphic_path): print 'Error: Directory already exists' return True try: bucket = utils.get_bucket(app_config.PRODUCTION_S3_BUCKET['bucket_name']) key = bucket.get_key('%s/graphics/%s/child.html' % (app_config.PROJECT_SLUG, slug)) if key: print 'Error: Slug exists on apps.npr.org' return True except boto.exception.NoAuthHandlerFound: print 'Could not authenticate, skipping Amazon S3 check' except boto.exception.S3ResponseError: print 'Could not access S3 bucket, skipping Amazon S3 check' return False
def deploy_file(src, dst, headers={}): """ Deploy a single file to S3, if the local version is different. """ bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name']) k = bucket.get_key(dst) s3_md5 = None if k: s3_md5 = k.etag.strip('"') else: k = Key(bucket) k.key = dst file_headers = copy.copy(headers) if app_config.S3_BUCKET == app_config.STAGING_S3_BUCKET: policy = 'private' else: policy = 'public-read' if 'Content-Type' not in headers: file_headers['Content-Type'] = mimetypes.guess_type(src)[0] if file_headers['Content-Type'] == 'text/html': # Force character encoding header file_headers['Content-Type'] = '; '.join( [file_headers['Content-Type'], 'charset=utf-8']) with open(src, 'rb') as f: local_md5 = hashlib.md5() local_md5.update(f.read()) local_md5 = local_md5.hexdigest() if local_md5 == s3_md5: print 'Skipping %s (has not changed)' % src else: print 'Uploading %s --> %s' % (src, dst) k.set_contents_from_filename(src, file_headers, policy=policy)
def _assets_get_bucket(): """ Get a reference to the assets bucket. """ return utils.get_bucket(app_config.ASSETS_S3_BUCKET['bucket_name'])
assert not np.any(np.isnan(imgs)) img_batches = minibatch(imgs, 32, 1000) label_batches = minibatch(gazes, 32, 1000) for images, labels in zip(img_batches, label_batches): # Calculate batch loss and accuracy loss, locs = sess.run([cost, locs], feed_dict={ x: images, y: labels, keep_prob: 1. }) acc = np.sum( np.array([ get_bucket(4, expected[0], expected[1], 244, 244) == get_bucket(4, actual[0], actual[1], 244, 244) for expected, actual in zip(labels, locs) ])) / len(locs) avg_acc += acc avg_loss += loss nums += 1 avg_acc /= nums avg_loss /= nums print("Epoch " + str(epoch) + ", Minibatch Loss= " + \ "{:.6f}".format(avg_loss) + ", Training Accuracy= " + \ "{:.5f}".format(avg_acc)) # Save model save_path = saver.save(sess, "loc_model_mse.ckpt") print("Model saved in file: %s" % save_path)
def sync(): """ Intelligently synchronize assets between S3 and local folder. """ ignore_globs = [] with open('%s/assetsignore' % ASSETS_ROOT, 'r') as f: ignore_globs = [l.strip() for l in f] local_paths = [] not_lowercase = [] for local_path, subdirs, filenames in os.walk(ASSETS_ROOT): for name in filenames: full_path = os.path.join(local_path, name) glob_path = full_path.split(ASSETS_ROOT)[1].strip('/') ignore = False for ignore_glob in ignore_globs: if fnmatch(glob_path, ignore_glob): ignore = True break if ignore: logger.info('Ignoring: %s' % full_path) continue if name.lower() != name: not_lowercase.append(full_path) local_paths.append(full_path) # Prevent case sensitivity differences between OSX and S3 from screwing us up if not_lowercase: logger.error( 'The following filenames are not lowercase, please change them before running `assets.sync`:' ) for name in not_lowercase: logger.error(name) return bucket = utils.get_bucket(app_config.ASSETS_S3_BUCKET) keys = bucket.list('%s/' % app_config.ASSETS_SLUG) which = None always = False for key in keys: download = False upload = False local_path = key.name.replace(app_config.ASSETS_SLUG, ASSETS_ROOT, 1) # Skip root key if local_path == '%s/' % ASSETS_ROOT: continue logger.info(local_path) if local_path in local_paths: # A file can only exist once, this speeds up future checks # and provides a list of non-existing files when complete local_paths.remove(local_path) # We need an actual key, not a "list key" # http://stackoverflow.com/a/18981298/24608 key = bucket.get_key(key.name) with open(local_path, 'rb') as f: local_md5 = key.compute_md5(f)[0] # Hashes are different if key.get_metadata('md5') != local_md5: if not always: # Ask user which file to take which, always = _assets_confirm(local_path) if not which: logger.info('Cancelling!') return if which == 'remote': download = True elif which == 'local': upload = True else: download = True if download: _assets_download(key, local_path) if upload: _assets_upload(local_path, key) action = None always = False # Iterate over files that didn't exist on S3 for local_path in local_paths: key_name = local_path.replace(ASSETS_ROOT, app_config.ASSETS_SLUG, 1) key = bucket.get_key(key_name, validate=False) logger.info(local_path) if not always: action, always = _assets_upload_confirm() if not action: logger.info('Cancelling!') return if action == 'upload': _assets_upload(local_path, key) elif action == 'delete': _assets_delete(local_path, key)
tweet1 = api.update_status(status=tweet1_dict["tweet_text"] + "\n\nWho is dying: Who is vaccinated:", media_ids=media_ids) # second tweet gif_id = api.media_upload(tweet2_dict["gif_path"]).media_id_string api.create_media_metadata(media_id=gif_id, alt_text=tweet2_dict["alt_text"]) tweet2 = api.update_status( in_reply_to_status_id=tweet1.id, status=tweet1_dict["tweet_text"], media_ids=[gif_id], ) # third tweet tweet3_status = ''' Read the latest on Chicago's widening vaccine disparity from @maerunes for @SouthSideWeekly: https://southsideweekly.com/chicagos-vaccine-disparity-widens/ ''' tweet3 = api.update_status( in_reply_to_status_id=tweet2.id, status=tweet3_status, ) # upload latest files to Google Cloud for embeds bucket = get_bucket("chivaxbot", GOOGLE_APPLICATION_CREDENTIALS) gcloud_uploads = [ "deaths_map_path_latest", "vax_map_path_latest", "sentence_path_latest" ] for path in gcloud_uploads: upload_to_gcloud(bucket, tweet1_dict[path])
def sync(): """ Intelligently synchronize assets between S3 and local folder. """ ignore_globs = [] with open('%s/assetsignore' % ASSETS_ROOT, 'r') as f: ignore_globs = [l.strip() for l in f] local_paths = [] not_lowercase = [] for local_path, subdirs, filenames in os.walk(ASSETS_ROOT): for name in filenames: full_path = os.path.join(local_path, name) glob_path = full_path.split(ASSETS_ROOT)[1].strip('/') ignore = False for ignore_glob in ignore_globs: if fnmatch(glob_path, ignore_glob): ignore = True break if ignore: print 'Ignoring: %s' % full_path continue if name.lower() != name: not_lowercase.append(full_path) local_paths.append(full_path) # Prevent case sensitivity differences between OSX and S3 from screwing us up if not_lowercase: print 'The following filenames are not lowercase, please change them before running `assets.sync`:' for name in not_lowercase: print ' %s' % name return bucket = utils.get_bucket(app_config.ASSETS_S3_BUCKET) keys = bucket.list(app_config.ASSETS_SLUG) which = None always = False for key in keys: download = False upload = False local_path = key.name.replace(app_config.ASSETS_SLUG, ASSETS_ROOT, 1) # Skip root key if local_path == '%s/' % ASSETS_ROOT: continue print local_path if local_path in local_paths: # A file can only exist once, this speeds up future checks # and provides a list of non-existing files when complete local_paths.remove(local_path) # We need an actual key, not a "list key" # http://stackoverflow.com/a/18981298/24608 key = bucket.get_key(key.name) with open(local_path, 'rb') as f: local_md5 = key.compute_md5(f)[0] # Hashes are different if key.get_metadata('md5') != local_md5: if not always: # Ask user which file to take which, always = _assets_confirm(local_path) if not which: print 'Cancelling!' return if which == 'remote': download = True elif which == 'local': upload = True else: download = True if download: _assets_download(key, local_path) if upload: _assets_upload(local_path, key) action = None always = False # Iterate over files that didn't exist on S3 for local_path in local_paths: key_name = local_path.replace(ASSETS_ROOT, app_config.ASSETS_SLUG, 1) key = bucket.get_key(key_name, validate=False) print local_path if not always: action, always = _assets_upload_confirm() if not action: print 'Cancelling!' return if action == 'upload': _assets_upload(local_path, key) elif action == 'delete': _assets_delete(local_path, key)
import json from google.cloud import storage from config import OUTPUT_BUCKET from utils import get_bucket, get_blob from process_data import normalize_data client = storage.Client() output_bucket = get_bucket(client, OUTPUT_BUCKET) def transform_data(data, context): """Transform data.""" bucket = client.get_bucket(data['bucket']) blob_name = data['name'] blob = bucket.get_blob(blob_name) data = blob.download_as_string() new_data = normalize_data(data) output_blob = get_blob(output_bucket, blob_name) output_blob.upload_from_string(json.dumps(new_data))