Example #1
0
def _assets_get_bucket():
    """
    Get a reference to the assets bucket.
    """
    s3 = utils.get_bucket(app_config.ASSETS_S3_BUCKET)

    return s3
Example #2
0
def _assets_get_bucket():
    """
    Get a reference to the assets bucket.
    """
    s3 = boto.connect_s3()

    return utils.get_bucket(app_config.ASSETS_S3_BUCKET['bucket_name'])
Example #3
0
def update_downloads():
    require('settings', provided_by=['production', 'staging'])

    with open('data/songs.csv') as f:
        rows = csv.DictReader(f)

        for row in rows:
            if not row['download_url']:
                print 'Missing download url'
                continue

            filename = row['download_url'].split('/')[-1]

            print filename

            download_request = requests.get(row['download_url'], stream=True)

            with open('downloads/%s' % filename, 'w') as f:
                for chunk in download_request.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)
                        f.flush()

            bucket = utils.get_bucket(app_config.S3_BUCKET)

            deploy_file(
                bucket,
                'downloads/%s' % filename,
                '%s/downloads/%s' % (app_config.PROJECT_SLUG, filename),
                headers={
                    'Cache-Control': 'max-age=%i' % app_config.ASSETS_MAX_AGE,
                    'Content-Disposition': 'attachment; filename="%s"' % filename
                }
            )
Example #4
0
def _assets_get_bucket():
    """
    Get a reference to the assets bucket.
    """
    s3 = utils.get_bucket(app_config.ASSETS_S3_BUCKET)

    return s3
def interactive_comparison():
    """Compare two sentences separated by a semi-colon"""
    with tf.Session() as sess:
        # Create model and load parameters.
        model = create_model(sess, True)
        model.batch_size = 1  # We decode one sentence at a time.

        # Load vocabularies.
        en_vocab_path = os.path.join(FLAGS.data_dir,
                                     "vocab%d.from" % FLAGS.from_vocab_size)
        fr_vocab_path = os.path.join(FLAGS.data_dir,
                                     "vocab%d.to" % FLAGS.to_vocab_size)
        en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)
        _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path)

        # Decode from standard input.
        sys.stdout.write("(1) > ")
        sys.stdout.flush()
        sentence = sys.stdin.readline()
        contexts = []
        while sentence:
            # Get token-ids for the input sentence.
            token_ids = data_utils.sentence_to_token_ids(
                tf.compat.as_bytes(sentence), en_vocab)
            print("tokenids:", token_ids)
            # Which bucket does it belong to?
            bucket_id = get_bucket(en_vocab, sentence)

            # Get a 1-element batch to feed the sentence to the model.
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                {bucket_id: [(token_ids, [])]}, bucket_id)

            # Get the output context vector
            output_context = model.step_context(sess, encoder_inputs,
                                                decoder_inputs, target_weights,
                                                bucket_id)

            # Append the context so we can compute the dot product
            contexts.append(output_context)

            # Display the output
            print("bucket_id: ", bucket_id)
            print("output_context", output_context)

            # Now we compute similarity metrics
            if len(contexts) == 2:
                cosine_distance = cosine_similarity(*contexts)
                euclid_distance = np.linalg.norm(contexts[1] - contexts[0])
                print('cosine_similarity', cosine_distance)
                print('euclid_distance', euclid_distance)
                print('-------------------------------')
                contexts = []

            # Start again
            next_sentence = len(contexts) + 1
            print("(%i) > " % next_sentence, end="")
            sys.stdout.flush()
            sentence = sys.stdin.readline()
Example #6
0
def get_image_exists(imageHash):
    # check if screenshot already exists
    bucket = get_bucket(BUCKET)
    for blob in bucket.list_blobs():
        if imageHash == blob.name:
            logger.info(f"Image {imageHash} already exists")
            return blob.public_url

    return ''
Example #7
0
def deploy_file(src, dst, headers={}):
    """
    Deploy a single file to S3, if the local version is different.
    """
    bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name'])

    k = bucket.get_key(dst)
    s3_md5 = None

    if k:
        s3_md5 = k.etag.strip('"')
    else:
        k = Key(bucket)
        k.key = dst

    file_headers = copy.copy(headers)

    if 'Content-Type' not in headers:
        file_headers['Content-Type'] = mimetypes.guess_type(src)[0]

    # Gzip file
    if os.path.splitext(src)[1].lower() in GZIP_FILE_TYPES:
        file_headers['Content-Encoding'] = 'gzip'

        with open(src, 'rb') as f_in:
            contents = f_in.read()

        output = StringIO()
        f_out = gzip.GzipFile(filename=dst, mode='wb', fileobj=output)
        f_out.write(contents)
        f_out.close()

        local_md5 = hashlib.md5()
        local_md5.update(output.getvalue())
        local_md5 = local_md5.hexdigest()

        if local_md5 == s3_md5:
            print 'Skipping %s (has not changed)' % src
        else:
            print 'Uploading %s --> %s (gzipped)' % (src, dst)
            k.set_contents_from_string(output.getvalue(),
                                       file_headers,
                                       policy='public-read')
    # Non-gzip file
    else:
        with open(src, 'rb') as f:
            local_md5 = hashlib.md5()
            local_md5.update(f.read())
            local_md5 = local_md5.hexdigest()

        if local_md5 == s3_md5:
            print 'Skipping %s (has not changed)' % src
        else:
            print 'Uploading %s --> %s' % (src, dst)
            k.set_contents_from_filename(src,
                                         file_headers,
                                         policy='public-read')
Example #8
0
def deploy_file(src, dst, max_age):
    """
    Deploy a single file to S3, if the local version is different.

    If warn_threshold is a positive integer N, we warn if the file is bigger
    is larger than N bytes.
    """
    bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name'])

    k = bucket.get_key(dst)
    s3_md5 = None

    if k:
        s3_md5 = k.etag.strip('"')
    else:
        k = Key(bucket)
        k.key = dst

    headers = {
        'Content-Type': mimetypes.guess_type(src)[0],
        'Cache-Control': 'max-age=%i' % max_age
    }

    # Gzip file
    if os.path.splitext(src)[1].lower() in GZIP_FILE_TYPES:
        headers['Content-Encoding'] = 'gzip'

        with open(src, 'rb') as f_in:
            contents = f_in.read()

        output = StringIO()
        f_out = gzip.GzipFile(filename=dst, mode='wb', fileobj=output)
        f_out.write(contents)
        f_out.close()

        local_md5 = hashlib.md5()
        local_md5.update(output.getvalue())
        local_md5 = local_md5.hexdigest()

        if local_md5 == s3_md5:
            print 'Skipping %s (has not changed)' % src
        else:
            print 'Uploading %s --> %s (gzipped)' % (src, dst)
            k.set_contents_from_string(output.getvalue(), headers, policy='public-read')
    # Non-gzip file
    else:
        with open(src, 'rb') as f:
            local_md5 = hashlib.md5()
            local_md5.update(f.read())
            local_md5 = local_md5.hexdigest()

        if local_md5 == s3_md5:
            print 'Skipping %s (has not changed)' % src
        else:
            print 'Uploading %s --> %s' % (src, dst)
            k.set_contents_from_filename(src, headers, policy='public-read')
Example #9
0
def delete_folder(dst):
    """
    Delete a folder from S3.
    """
    bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name'])

    for key in bucket.list(prefix='%s/' % dst):
        print 'Deleting %s' % (key.key)

        key.delete()
Example #10
0
def delete_folder(bucket_name, dst):
    """
    Delete a folder from S3.
    """
    bucket = utils.get_bucket(bucket_name)

    for key in bucket.list(prefix='%s/' % dst):
        print 'Deleting %s' % (key.key)

        key.delete()
Example #11
0
def delete_folder(dst):
    """
    Delete a folder from S3.
    """
    bucket = utils.get_bucket(app_config.S3_BUCKET["bucket_name"])

    for key in bucket.list(prefix="%s/" % dst):
        print "Deleting %s" % (key.key)

        key.delete()
Example #12
0
def check_timestamp():
    require('settings', provided_by=[production, staging])

    bucket = utils.get_bucket(app_config.S3_BUCKET)
    k = Key(bucket)
    k.key = '%s/live-data/timestamp.json' % app_config.PROJECT_SLUG
    if k.exists():
        return True
    else:
        return False
Example #13
0
def delete_folder(bucket_name, dst):
    """
    Delete a folder from S3.
    """
    bucket = utils.get_bucket(bucket_name)

    for key in bucket.list(prefix='%s/' % dst):
        logger.info('Deleting %s' % (key.key))

        key.delete()
def check_timestamp():
    require('settings', provided_by=[production, staging])

    bucket = utils.get_bucket(app_config.S3_BUCKET)
    k = Key(bucket)
    k.key = '%s/live-data/timestamp.json' % app_config.PROJECT_SLUG
    if k.exists():
        return True
    else:
        return False
Example #15
0
def delete_folder(dst):
    """
    Delete a folder from S3.
    """
    bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name'])

    for key in bucket.list(prefix='%s/' % dst):
        print 'Deleting %s' % (key.key)

        key.delete()
Example #16
0
def deploy_file(src, dst, headers={}):
    """
    Deploy a single file to S3, if the local version is different.
    """
    bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name'])

    k = bucket.get_key(dst)
    s3_md5 = None

    if k:
        s3_md5 = k.etag.strip('"')
    else:
        k = Key(bucket)
        k.key = dst

    file_headers = copy.copy(headers)

    if 'Content-Type' not in headers:
        file_headers['Content-Type'] = mimetypes.guess_type(src)[0]

    # Gzip file
    if os.path.splitext(src)[1].lower() in GZIP_FILE_TYPES:
        file_headers['Content-Encoding'] = 'gzip'

        with open(src, 'rb') as f_in:
            contents = f_in.read()

        output = StringIO()
        f_out = gzip.GzipFile(filename=dst, mode='wb', fileobj=output)
        f_out.write(contents)
        f_out.close()

        local_md5 = hashlib.md5()
        local_md5.update(output.getvalue())
        local_md5 = local_md5.hexdigest()

        if local_md5 == s3_md5:
            print 'Skipping %s (has not changed)' % src
        else:
            print 'Uploading %s --> %s (gzipped)' % (src, dst)
            k.set_contents_from_string(
                output.getvalue(), file_headers, policy='public-read')
    # Non-gzip file
    else:
        with open(src, 'rb') as f:
            local_md5 = hashlib.md5()
            local_md5.update(f.read())
            local_md5 = local_md5.hexdigest()

        if local_md5 == s3_md5:
            print 'Skipping %s (has not changed)' % src
        else:
            print 'Uploading %s --> %s' % (src, dst)
            k.set_contents_from_filename(
                src, file_headers, policy='public-read')
def decode():
    with tf.Session() as sess:
        # Create model and load parameters.
        model = create_model(sess, True)
        model.batch_size = 1  # We decode one sentence at a time.

        # Load vocabularies.
        en_vocab_path = os.path.join(FLAGS.data_dir,
                                     "vocab%d.from" % FLAGS.from_vocab_size)
        fr_vocab_path = os.path.join(FLAGS.data_dir,
                                     "vocab%d.to" % FLAGS.to_vocab_size)
        en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)
        _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path)

        # Decode from standard input.
        sys.stdout.write("> ")
        sys.stdout.flush()
        sentence = sys.stdin.readline()
        while sentence:
            # Get token-ids for the input sentence.
            token_ids = data_utils.sentence_to_token_ids(
                tf.compat.as_bytes(sentence), en_vocab)
            print("tokenids:", token_ids)
            # Which bucket does it belong to?
            bucket_id = get_bucket(en_vocab, sentence)

            # Get a 1-element batch to feed the sentence to the model.
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                {bucket_id: [(token_ids, [])]}, bucket_id)
            # Get output logits for the sentence.
            _, _, output_logits = model.step(sess, encoder_inputs,
                                             decoder_inputs, target_weights,
                                             bucket_id, True)
            # Get the output context vector
            output_context = model.step_context(sess, encoder_inputs,
                                                decoder_inputs, target_weights,
                                                bucket_id)
            # Display the output
            print("bucket_id: ", bucket_id)

            print("output_context", output_context)
            # This is a greedy decoder - outputs are just argmaxes of output_logits.
            outputs = [
                int(np.argmax(logit, axis=1)) for logit in output_logits
            ]
            # If there is an EOS symbol in outputs, cut them at that point.
            if data_utils.EOS_ID in outputs:
                outputs = outputs[:outputs.index(data_utils.EOS_ID)]
            # Print out French sentence corresponding to outputs.
            print(" ".join([
                tf.compat.as_str(rev_fr_vocab[output]) for output in outputs
            ]))
            print("> ", end="")
            sys.stdout.flush()
            sentence = sys.stdin.readline()
Example #18
0
def check_timestamp():
    require('settings', provided_by=[production, staging])

    bucket = utils.get_bucket(app_config.S3_BUCKET)
    k = Key(bucket)
    k.key = '%s%s/live-data/timestamp.json' % (
        app_config.LIVEBLOG_DIRECTORY_PREFIX, app_config.CURRENT_LIVEBLOG)
    if k.exists():
        return True
    else:
        return False
Example #19
0
def get(path):
    bucket = get_bucket(
        {
            "time": "time",
            "loom": "loom",
            "react": "react",
            "ok-help": "ok-help",
            "wiki": "wiki",
        },
        "react-pr153",
    )
    return serve_path(bucket, "/", path)
Example #20
0
def encode():
    """Encode all of the sentences to vector form"""
    train, dev, test = loader.getData()
    sentences = []
    tokens = []

    # Load the vocab
    en_vocab = get_english_vocab(DATA_DIR, VOCAB_SIZE)

    # Collect all the training sentences
    for i, row in pd.concat((train, test)).iterrows():
        if isinstance(row["sentence1"], basestring) and isinstance(
                row["sentence2"], basestring):
            sentences.append(row["sentence1"])
            sentences.append(row["sentence2"])

    # Allocate the sentences to buckets
    bucketed = {}
    for sentence in sentences:
        bucket_id = get_bucket(en_vocab, sentence)
        bucketed.setdefault(bucket_id, [])
        bucketed[bucket_id].append(sentence)

    mapped = {}
    with tf.Session() as sess:
        # Create model and load parameters.
        model = create_model(sess, True, train_dir=TRAIN_DIR)
        model.batch_size = BATCH_SIZE  # We decode 64 sentence at a time.
        # Iterate over each bucket
        for bucket_id, sentences in bucketed.iteritems():
            for batch in chunker(sentences, BATCH_SIZE):
                data = []
                for sentence in batch:
                    token_ids = data_utils.sentence_to_token_ids(
                        tf.compat.as_bytes(sentence), en_vocab)
                    expected_output = []
                    data.append((token_ids, expected_output))
                encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                    {bucket_id: data}, bucket_id)
                contexts = model.step_context(sess, encoder_inputs,
                                              decoder_inputs, target_weights,
                                              bucket_id)
                features = np.hstack(contexts)
                print 'Extracted another set of features with shape:', features.shape
                # Now we align sentences with their contexts
                for i, sentence in enumerate(batch):
                    mapped[sentence] = features[i, :].tolist()
    print sentence
    print mapped[sentence]
    print "Saving sentences to %s" % JSON_NAME
    with open(JSON_NAME, 'w') as file:
        json.dump(mapped, file)
Example #21
0
def check_timestamp():
    """
    Check if a timestamp file exists.
    """
    require('settings', provided_by=[production, staging])

    bucket = utils.get_bucket(app_config.S3_BUCKET)
    k = Key(bucket)
    k.key = 'live-data/timestamp.json'
    if k.exists():
        return True
    else:
        return False
Example #22
0
def get(path):
    bucket = get_bucket(
        {
            "time": "time",
            "loom": "loom",
            "react": "react",
            "ok-help": "ok-help",
            "wiki": "wiki",
            "docs": "docs",
            "cs170-website": "cs170-website",
            "cs170": "cs170-website",
            # simple default app for PR testing
            "static-server": "time",
        },
        "time",
    )
    return serve_path(bucket, "/", path)
Example #23
0
def upload_to_bucket(imageContent, imageHash):

    try:
        bucket = get_bucket(BUCKET)
        logger.info("new image. Uploading screenshot")
        blob = bucket.blob(imageHash)
        blob.upload_from_file(BytesIO(base64.b64decode(
            imageContent)), content_type="image/png")

        # make public and return url
        blob.make_public()
    except Exception as e:
        raise RuntimeException(
            f"Problems while uploading screenshot. {str(e)}")

    logger.info(f"Screenshot at {blob.public_url}")
    return blob.public_url
Example #24
0
def get_sentence_to_context_map(sentences):
    """
  Process all of the sentences with the model
  Return a map between sentence text and the context vectors
  The order of the map is undefined due to the bucketing process
  """
    # Load the vocab
    en_vocab = get_english_vocab(DATA_DIR, VOCAB_SIZE)

    # Allocate the sentences to buckets
    bucketed = {}
    for sentence in sentences:
        bucket_id = get_bucket(en_vocab, sentence)
        bucketed.setdefault(bucket_id, [])
        bucketed[bucket_id].append(sentence)

    mapped = {}
    with tf.Session() as sess:
        # Create model and load parameters.
        model = create_model(sess, True, train_dir=TRAIN_DIR)
        model.batch_size = BATCH_SIZE  # We decode 64 sentence at a time.
        # Iterate over each bucket
        for bucket_id, sentences in bucketed.iteritems():
            for batch in chunker(sentences, BATCH_SIZE):
                data = []
                # Tokenize each sentence
                for sentence in batch:
                    token_ids = data_utils.sentence_to_token_ids(
                        tf.compat.as_bytes(sentence), en_vocab)
                    expected_output = []
                    data.append((token_ids, expected_output))
                # Use the model to obtain contexts for each sentence in the batch
                encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                    {bucket_id: data}, bucket_id)
                contexts = model.step_context(sess, encoder_inputs,
                                              decoder_inputs, target_weights,
                                              bucket_id)
                features = np.hstack(contexts)
                print 'Encoded {0} sentences into {1} dimensional vectors'.format(
                    *features.shape)
                # Now we align sentences with their contexts
                for i, sentence in enumerate(batch):
                    mapped[sentence] = features[i, :].tolist()
    return mapped
Example #25
0
def deploy_folder(bucket_name, src, dst, headers={}, ignore=[]):
    """
    Deploy a folder to S3, checking each file to see if it has changed.
    """
    to_deploy = []

    for local_path, subdirs, filenames in os.walk(src, topdown=True):
        rel_path = os.path.relpath(local_path, src)

        for name in filenames:
            if name.startswith('.'):
                continue

            src_path = os.path.join(local_path, name)

            skip = False

            for pattern in ignore:
                if fnmatch(src_path, pattern):
                    skip = True
                    break

            if skip:
                continue

            if rel_path == '.':
                dst_path = os.path.join(dst, name)
            else:
                dst_path = os.path.join(dst, rel_path, name)

            to_deploy.append((src_path, dst_path))

    if bucket_name == app_config.STAGING_S3_BUCKET:
        public = False
    else:
        public = True
    bucket = utils.get_bucket(bucket_name)
    logger.info(dst)
    for src, dst in to_deploy:
        deploy_file(bucket, src, dst, headers, public=public)
Example #26
0
def deploy_folder(bucket_name, src, dst, headers={}, ignore=[]):
    """
    Deploy a folder to S3, checking each file to see if it has changed.
    """
    to_deploy = []

    for local_path, subdirs, filenames in os.walk(src, topdown=True):
        rel_path = os.path.relpath(local_path, src)

        for name in filenames:
            if name.startswith('.'):
                continue

            src_path = os.path.join(local_path, name)

            skip = False

            for pattern in ignore:
                if fnmatch(src_path, pattern):
                    skip = True
                    break

            if skip:
                continue

            if rel_path == '.':
                dst_path = os.path.join(dst, name)
            else:
                dst_path = os.path.join(dst, rel_path, name)

            to_deploy.append((src_path, dst_path))

    if bucket_name == app_config.STAGING_S3_BUCKET:
        public = False
    else:
        public = True
    bucket = utils.get_bucket(bucket_name)
    logger.info(dst)
    for src, dst in to_deploy:
        deploy_file(bucket, src, dst, headers, public=public)
Example #27
0
def deploy_file(src, dst, headers={}):
    """
    Deploy a single file to S3, if the local version is different.
    """
    bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name'])

    k = bucket.get_key(dst)
    s3_md5 = None

    if k:
        s3_md5 = k.etag.strip('"')
    else:
        k = Key(bucket)
        k.key = dst

    file_headers = copy.copy(headers)

    if app_config.S3_BUCKET == app_config.STAGING_S3_BUCKET:
        policy = 'private'
    else:
        policy = 'public-read'

    if 'Content-Type' not in headers:
        file_headers['Content-Type'] = mimetypes.guess_type(src)[0]
        if file_headers['Content-Type'] == 'text/html':
            # Force character encoding header
            file_headers['Content-Type'] = '; '.join([
                file_headers['Content-Type'],
                'charset=utf-8'])
            
    with open(src, 'rb') as f:
        local_md5 = hashlib.md5()
        local_md5.update(f.read())
        local_md5 = local_md5.hexdigest()

    if local_md5 == s3_md5:
        print 'Skipping %s (has not changed)' % src
    else:
        print 'Uploading %s --> %s' % (src, dst)
        k.set_contents_from_filename(src, file_headers, policy=policy)
Example #28
0
def rm(path):
    """
    Remove an asset from s3 and locally
    """
    bucket = utils.get_bucket(app_config.ASSETS_S3_BUCKET)

    file_list = glob(path)

    found_folder = True

    # Add files in folders, instead of folders themselves (S3 doesn't have folders)
    while found_folder:
        found_folder = False

        for local_path in file_list:
            if os.path.isdir(local_path):
                found_folder = True

                file_list.remove(local_path)

                for path in os.listdir(local_path):
                    file_list.append(os.path.join(local_path, path))

    if len(file_list) > 0:
        utils.confirm("You are about to destroy %i files. Are you sure?" %
                      len(file_list))

        for local_path in file_list:
            logger.info(local_path)

            if os.path.isdir(local_path):
                file_list.extend(os.listdir(local_path))

                continue

            key_name = local_path.replace(ASSETS_ROOT, app_config.ASSETS_SLUG,
                                          1)
            key = bucket.get_key(key_name)

            _assets_delete(local_path, key)
Example #29
0
def _check_slug(slug):
    """
    Does slug exist in graphics folder or production s3 bucket?
    """
    graphic_path = '%s/%s' % (app_config.GRAPHICS_PATH, slug)
    if os.path.isdir(graphic_path):
        print 'Error: Directory already exists'
        return True

    try:
        bucket = utils.get_bucket(app_config.PRODUCTION_S3_BUCKET['bucket_name'])
        key = bucket.get_key('%s/graphics/%s/child.html' % (app_config.PROJECT_SLUG, slug))

        if key:
            print 'Error: Slug exists on apps.npr.org'
            return True
    except boto.exception.NoAuthHandlerFound:
        print 'Could not authenticate, skipping Amazon S3 check'
    except boto.exception.S3ResponseError:
        print 'Could not access S3 bucket, skipping Amazon S3 check'

    return False
Example #30
0
def _check_slug(slug):
    """
    Does slug exist in graphics folder or production s3 bucket?
    """
    graphic_path = '%s/%s' % (app_config.GRAPHICS_PATH, slug)
    if os.path.isdir(graphic_path):
        print 'Error: Directory already exists'
        return True

    try:
        bucket = utils.get_bucket(app_config.PRODUCTION_S3_BUCKET['bucket_name'])
        key = bucket.get_key('%s/graphics/%s/child.html' % (app_config.PROJECT_SLUG, slug))

        if key:
            print 'Error: Slug exists on apps.npr.org'
            return True
    except boto.exception.NoAuthHandlerFound:
        print 'Could not authenticate, skipping Amazon S3 check'
    except boto.exception.S3ResponseError:
        print 'Could not access S3 bucket, skipping Amazon S3 check'

    return False
Example #31
0
def deploy_file(src, dst, headers={}):
    """
    Deploy a single file to S3, if the local version is different.
    """
    bucket = utils.get_bucket(app_config.S3_BUCKET['bucket_name'])

    k = bucket.get_key(dst)
    s3_md5 = None

    if k:
        s3_md5 = k.etag.strip('"')
    else:
        k = Key(bucket)
        k.key = dst

    file_headers = copy.copy(headers)

    if app_config.S3_BUCKET == app_config.STAGING_S3_BUCKET:
        policy = 'private'
    else:
        policy = 'public-read'

    if 'Content-Type' not in headers:
        file_headers['Content-Type'] = mimetypes.guess_type(src)[0]
        if file_headers['Content-Type'] == 'text/html':
            # Force character encoding header
            file_headers['Content-Type'] = '; '.join(
                [file_headers['Content-Type'], 'charset=utf-8'])

    with open(src, 'rb') as f:
        local_md5 = hashlib.md5()
        local_md5.update(f.read())
        local_md5 = local_md5.hexdigest()

    if local_md5 == s3_md5:
        print 'Skipping %s (has not changed)' % src
    else:
        print 'Uploading %s --> %s' % (src, dst)
        k.set_contents_from_filename(src, file_headers, policy=policy)
Example #32
0
def _assets_get_bucket():
    """
    Get a reference to the assets bucket.
    """
    return utils.get_bucket(app_config.ASSETS_S3_BUCKET['bucket_name'])
            assert not np.any(np.isnan(imgs))
            img_batches = minibatch(imgs, 32, 1000)
            label_batches = minibatch(gazes, 32, 1000)

            for images, labels in zip(img_batches, label_batches):

                # Calculate batch loss and accuracy
                loss, locs = sess.run([cost, locs],
                                      feed_dict={
                                          x: images,
                                          y: labels,
                                          keep_prob: 1.
                                      })
                acc = np.sum(
                    np.array([
                        get_bucket(4, expected[0], expected[1], 244, 244)
                        == get_bucket(4, actual[0], actual[1], 244, 244)
                        for expected, actual in zip(labels, locs)
                    ])) / len(locs)
                avg_acc += acc
                avg_loss += loss
                nums += 1
        avg_acc /= nums
        avg_loss /= nums
        print("Epoch " + str(epoch) + ", Minibatch Loss= " + \
                "{:.6f}".format(avg_loss) + ", Training Accuracy= " + \
                "{:.5f}".format(avg_acc))

    # Save model
    save_path = saver.save(sess, "loc_model_mse.ckpt")
    print("Model saved in file: %s" % save_path)
Example #34
0
def sync():
    """
    Intelligently synchronize assets between S3 and local folder.
    """
    ignore_globs = []

    with open('%s/assetsignore' % ASSETS_ROOT, 'r') as f:
        ignore_globs = [l.strip() for l in f]

    local_paths = []
    not_lowercase = []

    for local_path, subdirs, filenames in os.walk(ASSETS_ROOT):
        for name in filenames:
            full_path = os.path.join(local_path, name)
            glob_path = full_path.split(ASSETS_ROOT)[1].strip('/')

            ignore = False

            for ignore_glob in ignore_globs:
                if fnmatch(glob_path, ignore_glob):
                    ignore = True
                    break

            if ignore:
                logger.info('Ignoring: %s' % full_path)
                continue

            if name.lower() != name:
                not_lowercase.append(full_path)

            local_paths.append(full_path)

    # Prevent case sensitivity differences between OSX and S3 from screwing us up
    if not_lowercase:
        logger.error(
            'The following filenames are not lowercase, please change them before running `assets.sync`:'
        )

        for name in not_lowercase:
            logger.error(name)

        return

    bucket = utils.get_bucket(app_config.ASSETS_S3_BUCKET)
    keys = bucket.list('%s/' % app_config.ASSETS_SLUG)

    which = None
    always = False

    for key in keys:
        download = False
        upload = False

        local_path = key.name.replace(app_config.ASSETS_SLUG, ASSETS_ROOT, 1)

        # Skip root key
        if local_path == '%s/' % ASSETS_ROOT:
            continue

        logger.info(local_path)

        if local_path in local_paths:
            # A file can only exist once, this speeds up future checks
            # and provides a list of non-existing files when complete
            local_paths.remove(local_path)

            # We need an actual key, not a "list key"
            # http://stackoverflow.com/a/18981298/24608
            key = bucket.get_key(key.name)

            with open(local_path, 'rb') as f:
                local_md5 = key.compute_md5(f)[0]

            # Hashes are different
            if key.get_metadata('md5') != local_md5:
                if not always:
                    # Ask user which file to take
                    which, always = _assets_confirm(local_path)

                if not which:
                    logger.info('Cancelling!')

                    return

                if which == 'remote':
                    download = True
                elif which == 'local':
                    upload = True
        else:
            download = True

        if download:
            _assets_download(key, local_path)

        if upload:
            _assets_upload(local_path, key)

    action = None
    always = False

    # Iterate over files that didn't exist on S3
    for local_path in local_paths:
        key_name = local_path.replace(ASSETS_ROOT, app_config.ASSETS_SLUG, 1)
        key = bucket.get_key(key_name, validate=False)

        logger.info(local_path)

        if not always:
            action, always = _assets_upload_confirm()

        if not action:
            logger.info('Cancelling!')

            return

        if action == 'upload':
            _assets_upload(local_path, key)
        elif action == 'delete':
            _assets_delete(local_path, key)
Example #35
0
    tweet1 = api.update_status(status=tweet1_dict["tweet_text"] +
                               "\n\nWho is dying:		   Who is vaccinated:",
                               media_ids=media_ids)

    # second tweet
    gif_id = api.media_upload(tweet2_dict["gif_path"]).media_id_string
    api.create_media_metadata(media_id=gif_id,
                              alt_text=tweet2_dict["alt_text"])
    tweet2 = api.update_status(
        in_reply_to_status_id=tweet1.id,
        status=tweet1_dict["tweet_text"],
        media_ids=[gif_id],
    )

    # third tweet
    tweet3_status = '''
        Read the latest on Chicago's widening vaccine disparity from @maerunes for @SouthSideWeekly: https://southsideweekly.com/chicagos-vaccine-disparity-widens/
    '''
    tweet3 = api.update_status(
        in_reply_to_status_id=tweet2.id,
        status=tweet3_status,
    )
    # upload latest files to Google Cloud for embeds
    bucket = get_bucket("chivaxbot", GOOGLE_APPLICATION_CREDENTIALS)
    gcloud_uploads = [
        "deaths_map_path_latest", "vax_map_path_latest", "sentence_path_latest"
    ]

    for path in gcloud_uploads:
        upload_to_gcloud(bucket, tweet1_dict[path])
Example #36
0
def sync():
    """
    Intelligently synchronize assets between S3 and local folder.
    """
    ignore_globs = []

    with open('%s/assetsignore' % ASSETS_ROOT, 'r') as f:
        ignore_globs = [l.strip() for l in f]

    local_paths = []
    not_lowercase = []

    for local_path, subdirs, filenames in os.walk(ASSETS_ROOT):
        for name in filenames:
            full_path = os.path.join(local_path, name)
            glob_path = full_path.split(ASSETS_ROOT)[1].strip('/')

            ignore = False

            for ignore_glob in ignore_globs:
                if fnmatch(glob_path, ignore_glob):
                    ignore = True
                    break

            if ignore:
                print 'Ignoring: %s' % full_path
                continue

            if name.lower() != name:
                not_lowercase.append(full_path)

            local_paths.append(full_path)

    # Prevent case sensitivity differences between OSX and S3 from screwing us up
    if not_lowercase:
        print 'The following filenames are not lowercase, please change them before running `assets.sync`:'

        for name in not_lowercase:
            print '    %s' % name

        return

    bucket = utils.get_bucket(app_config.ASSETS_S3_BUCKET)
    keys = bucket.list(app_config.ASSETS_SLUG)

    which = None
    always = False

    for key in keys:
        download = False
        upload = False

        local_path = key.name.replace(app_config.ASSETS_SLUG, ASSETS_ROOT, 1)

        # Skip root key
        if local_path == '%s/' % ASSETS_ROOT:
            continue

        print local_path

        if local_path in local_paths:
            # A file can only exist once, this speeds up future checks
            # and provides a list of non-existing files when complete
            local_paths.remove(local_path)

            # We need an actual key, not a "list key"
            # http://stackoverflow.com/a/18981298/24608
            key = bucket.get_key(key.name)

            with open(local_path, 'rb') as f:
                local_md5 = key.compute_md5(f)[0]

            # Hashes are different
            if key.get_metadata('md5') != local_md5:
                if not always:
                    # Ask user which file to take
                    which, always = _assets_confirm(local_path)

                if not which:
                    print 'Cancelling!'

                    return

                if which == 'remote':
                    download = True
                elif which == 'local':
                    upload = True
        else:
            download = True

        if download:
            _assets_download(key, local_path)

        if upload:
            _assets_upload(local_path, key)

    action = None
    always = False

    # Iterate over files that didn't exist on S3
    for local_path in local_paths:
        key_name = local_path.replace(ASSETS_ROOT, app_config.ASSETS_SLUG, 1)
        key = bucket.get_key(key_name, validate=False)

        print local_path

        if not always:
            action, always = _assets_upload_confirm()

        if not action:
            print 'Cancelling!'

            return

        if action == 'upload':
            _assets_upload(local_path, key)
        elif action == 'delete':
            _assets_delete(local_path, key)
Example #37
0
import json

from google.cloud import storage

from config import OUTPUT_BUCKET
from utils import get_bucket, get_blob
from process_data import normalize_data

client = storage.Client()
output_bucket = get_bucket(client, OUTPUT_BUCKET)


def transform_data(data, context):
    """Transform data."""

    bucket = client.get_bucket(data['bucket'])
    blob_name = data['name']
    blob = bucket.get_blob(blob_name)

    data = blob.download_as_string()
    new_data = normalize_data(data)

    output_blob = get_blob(output_bucket, blob_name)
    output_blob.upload_from_string(json.dumps(new_data))