def get_flo(self):
        s3_client = boto3.client('s3')
        debug("in get_flo")
        try:
            result = s3_client.get_object(Bucket=self.bucket_name,
                                          Key=self.key_name)
        except Exception as e:
            text = repr(e)[:256]
            self.add_error("failed to process s3 object {}/{} '{}'".format(
                self.bucket_name, self.key_name, text))
            # issue #14 - the below decode majik is from AWS sample code.
            new_key = urllib.unquote_plus(self.key_name.encode('utf8'))
            self.add_message("First get failed ({}), trying to unquote"
                             " ({})".format(self.key_name, new_key))
            result = s3_client.get_object(Bucket=self.bucket_name, Key=new_key)
            self.add_message("get_object worked after unescaping")

        debug("after s3_client.get_object() result={}".format(type(result)))
        if result['ContentLength'] > MAX_EXE_SIZE:
            msg = """Too big: {}/{} {}
                    ({})""".format(self.bucket_name, self.key_name,
                                   result['ContentLength'], repr(result))
            print(msg)
            raise SigVerifyTooBig(msg)
        debug("before body read")
        flo = BytesIO(result['Body'].read())
        debug("after read() flo={}".format(type(flo)))
        return flo
Exemple #2
0
def lambda_handler(event, context):
    rss = event['rss']
    bucket_name = event['bucket']
    logging.info("Processing url: %s" % rss)
    logging.info("Using bucket: %s" % bucket_name)

    # session = boto3.Config(region_name="us-west-2") 
    polly = boto3.client("polly")
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket_name)

    logging.info("getting list of existing objects in the given bucket")
    files = set(o.key for o in bucket.objects.all())

    feed = feedparser.parse(rss)

    title = feed['feed']['title']
    fg = FeedGenerator()
    fg.load_extension('podcast')
    fg.title('Audio podcast based on: %s' % title)
    fg.link(href=feed.feed.link, rel='alternate')
    fg.subtitle(feed.feed.description)

    ENTRY_URL = "http://s3-{region}.amazonaws.com/{bucket}/{filename}"

    for entry in get_entries(feed):
        filename = "%s.mp3" % entry['id']
        fe = fg.add_entry()
        fe.id(entry['id'])
        fe.title(entry['title'])
        fe.published(entry['published'])
        entry_url = ENTRY_URL.format(
            bucket=bucket_name,
            filename=filename,
            region=os.environ["AWS_REGION_BUCKET"]
        )
        fe.enclosure(entry_url, 0, 'audio/mpeg')
        if filename in files:
            logging.info('Article "%s" with id %s already exist, skipping.'
                         % (entry['title'], entry['id']))
            continue
        try:
            logging.info("Next entry, size: %d" % len(entry['content']))
            logging.debug("Content: %s" % entry['content'])
            response = polly.synthesize_speech(
                Text=entry['content'],
                OutputFormat="mp3",
                VoiceId="Joanna"
            )
            with closing(response["AudioStream"]) as stream:
                bucket.put_object(Key=filename, Body=stream.read())
        except BotoCoreError as error:
            logging.error(error)
    bucket.put_object(Key='podcast.xml', Body=fg.rss_str(pretty=True))
    def send_sns(self, msg, e=None, reraise=False):
        # use first line of incoming msg as subject, but AWS limit is under 100
        # chars.
        # ASSUME anything over is due to long s3 URL and use heuristic
        subject = msg.split('\n')[0]
        if len(subject) >= 100:
            # split assuming URL, then retain result (index 0) and file name
            # (index -1). File name should be sufficient to allow page
            # recipient to decide urgency of further investigation.
            pieces = subject.split('/')
            subject = "{} ... {}".format(pieces[0], pieces[-1])
            if len(subject) >= 100:
                # don't try to be smarter, full text is still in 'msg'
                subject = "Truncated subject, examine message"

        # append bucket & key, short key first
        msg += "\n{}\nkey={}\nbucket={}".format(
            os.path.basename(self.key_name), self.key_name, self.bucket_name)
        # hack to get traceback in email
        if e:
            import traceback
            msg += traceback.format_exc()
        client = boto3.client("sns")
        # keep a global to prevent infinite recursion on arn error
        global topic_arn
        topic_arn = os.environ.get('SNSARN', "")
        if self.verbose:
            print("snsarn: {}".format(topic_arn))
        if not topic_arn:
            # bad config, we expected this in the environ
            # set flag so we don't re-raise
            topic_arn = "no-topic-arn"
            raise KeyError("Missing 'SNSARN' from environment")
        try:
            # if the publish fails, we still want to continue, so we get the
            # details into the cloud watch logs. Otherwise, this can
            # (sometimes) terminate the lambda causing retries & DLQ
            response = client.publish(Message=msg,
                                      Subject=subject,
                                      TopicArn=topic_arn)
            debug("sns publish: '{}'".format(response))
        except Exception as e:
            self.add_message("sns publish failed\n"
                             "   msg ({}): '{}'\n"
                             "  subj ({}): '{}'\n"
                             "exception: '{}'"
                             "".format(len(msg), str(msg), len(subject),
                                       str(subject), str(e)))
 def send_sns(self, msg, e=None, reraise=False):
     # hack to get traceback in email
     if e:
         import traceback
         msg += traceback.format_exc()
     client = boto3.client("sns")
     # keep a global to prevent infinite recursion on arn error
     global topic_arn
     topic_arn = os.environ.get('SNSARN', "")
     if self.verbose:
         print("snsarn: {}".format(topic_arn))
     if not topic_arn:
         # bad config, we expected this in the environ
         # set flag so we don't re-raise
         topic_arn = "no-topic-arn"
         raise KeyError("Missing 'SNSARN' from environment")
     response = client.publish(Message=msg,
                               TopicArn=topic_arn)  # noqa: W0612
    def get_flo(self):
        s3_client = boto3.client('s3')
        debug("in get_flo")
        try:
            # Make sure the object is really available taken from
            #   https://blog.rackspace.com/the-devnull-s3-bucket-hacking-with-aws-lambda-and-python
            # Don't use defaults, though -- that's 100 sec during testing!
            start_waiting = time.time()
            waiter = s3_client.get_waiter('object_exists')
            waiter.wait(Bucket=self.bucket_name,
                        Key=self.key_name,
                        WaiterConfig={
                            'Delay': 3,
                            'MaxAttempts': 3
                        })
            result = s3_client.get_object(Bucket=self.bucket_name,
                                          Key=self.key_name)
        except Exception as e:
            debug("s3 exceptions type: {}".format(type(e)))
            self.had_s3_error = True
            text = repr(e)[:256]
            self.add_error("failed to process s3 object {}/{} '{}'".format(
                self.bucket_name, self.key_name, text))
            raise
        finally:
            self.s3_wait_time = time.time() - start_waiting

        debug("after s3_client.get_object() result={}".format(type(result)))
        if result['ContentLength'] > MAX_EXE_SIZE:
            msg = """Too big: {}/{} {}
                    ({})""".format(self.bucket_name, self.key_name,
                                   result['ContentLength'], repr(result))
            print(msg)
            raise SigVerifyTooBig(msg)
        debug("before body read")
        flo = BytesIO(result['Body'].read())
        debug("after read() flo={}".format(type(flo)))
        return flo
Exemple #6
0
def get_user():
    # This function doesn't have to be decorated, because the API call to IAM
    # will be traced thanks to the monkey-patching.
    iam = boto3.client('iam')
    return dumps(iam.get_user(UserName="******"), default=json_serial)