Esempio n. 1
0
 def add_recipe(self, recipe, reset=False):
     """
     Add a scheduled recipe to the list of scheduled recipes.
     """
     if not reset:
         msg = 'Adding {} recipe ({} / {}) at {}'\
             .format(recipe.schedule_by, recipe.id, recipe.slug, dates.now())
         self.log(msg)
     else:
         msg = 'Resetting {} recipe ({} / {}) at {}'\
         .format(recipe.schedule_by, recipe.id, recipe.slug, dates.now())
         self.log(msg)
     self._running_recipes['{}:reset'.format(recipe.id)] = reset        
     self._running_recipes[recipe.id] = recipe
Esempio n. 2
0
    def run(self):
        """
        Fetch homepage URLs, lookup content item IDS, and set number of minutes
        it's been on the homepage.
        """
        p = self.options.pop('page')
        for link in pageone.get(p, **self.options):
            u = link.get('url')

            # smartly handle urls
            u = url.prepare(u, canonicalize=False)
            if u and not u in self.url_lookup:
                u = url.prepare(u, canonicalize=True)

            # yield metrics
            if u and u in self.url_lookup:
                cids = self.url_lookup[u]
                for cid in cids:
                    yield {
                        'datetime': dates.now(),
                        'content_item_id': cid,
                        'metrics': {
                            'time_on_homepage': self.recipe.get('minutes', 60)
                        }
                    }
Esempio n. 3
0
    def run(self):
        """
        Fetch homepage URLs, lookup content item IDS, and set number of minutes
        it's been on the homepage.
        """
        p = self.options.pop('page')
        for link in pageone.get(p, **self.options):
            u = link.get('url')

            # smartly handle urls
            u = url.prepare(u, canonicalize=False)
            if u and not u in self.url_lookup:
                u = url.prepare(u, canonicalize=True)

            # yield metrics
            if u and u in self.url_lookup:
                cids = self.url_lookup[u]
                for cid in cids:
                    yield {
                        'datetime': dates.now(),
                        'content_item_id': cid,
                        'metrics': {
                            'time_on_homepage': self.recipe.get('minutes', 60)
                        }
                    }
Esempio n. 4
0
    def run(self):
        """
        Extract an RSS Feed and create articles.
        """
        feed_url = self.options['feed_url']
        domains = self.org.get('domains', [])
        entries = rss.get_entries(feed_url, [])
        self.publish_dates = []

        # iterate through RSS entries.
        for article in entries:
            article['type'] = 'article'  # set this type as article.

            # since we poll often, we can assume this is a good
            # approximation of an article publish date.
            if not article.get('created'):
                article['created'] = dates.now()

            # if we havent run, just yield all results.
            if not self.max_date_last_run:
                self.publish_dates.append(article['created'])
                yield article

            # only yield new articles
            elif article['created'] > self.max_date_last_run:
                self.publish_dates.append(article['created'])
                yield article
Esempio n. 5
0
def test_bulk_content_timeseries(nrows=10000):
    """
    Test bulk loading timeseries metrics
    """
    start = time.time()
    content_item_ids = [r['id'] for r in api.orgs.simple_content()]
    data = []
    for i in xrange(nrows):
        hours = nrows - i
        data.append({
            'content_item_id':
            choice(content_item_ids),
            'datetime':
            (dates.now() - timedelta(days=30, hours=hours)).isoformat(),
            'metrics': {
                'twitter_shares': i
            }
        })

    # make request and return status url
    res = api.content.bulk_create_timeseries(data)
    poll_status_url(res.get('status_url'))
    end = time.time()
    print "Bulk Loading {} Content Timeseries Metrics Took {} seconds"\
        .format(nrows, round((end-start), 2))
Esempio n. 6
0
    def cook_recipe(self):
        """
        Full pipeline.
        """
        # indicate that the recipe is running.
        self.recipe.last_run = dates.now()
        self.recipe.status = "running"
        db.session.add(self.recipe)
        db.session.commit()

        # generate a job id
        job_id = gen_uuid()

        # import the sous chef here to get the timeout
        # and raise import errors before it attempts to run
        # in the queue
        sc = import_sous_chef(self.sous_chef_path)

        # stash kwargs
        kw_key = self.stash_kw(job_id)

        # send it to the queue
        self.q.enqueue(run_sous_chef,
                       self.sous_chef_path,
                       self.recipe.id,
                       kw_key,
                       job_id=job_id,
                       timeout=sc.timeout,
                       result_ttl=self.kw_ttl)

        # return the job id
        return job_id
Esempio n. 7
0
    def run(self):
        """
        Extract an RSS Feed and create articles.
        """
        feed_url = self.options['feed_url']
        feed_domain = url.get_simple_domain(feed_url)
        domains = self.org.get('domains', [''])
        if feed_domain:
            domains.append(feed_domain)

        # iterate through RSS entries.
        self.log.info('Fetching {}'.format(feed_url))
        for article in get_feed(feed_url, domains):
            article['type'] = 'article'  # set this type as article.

            # since we poll often, we can assume this is a good
            # approximation of an article publish date.
            if not article.get('created'):
                article['created'] = dates.now()

            # if we havent run, just yield all results.
            if not self.max_date_last_run:
                self.publish_dates.append(article['created'])
                yield article

            # only yield new articles
            elif article['created'] > self.max_date_last_run:
                self.publish_dates.append(article['created'])
                yield article
Esempio n. 8
0
 def __init__(self, **kw):
     self.name = kw.get("name")
     self.email = kw.get("email")
     self.set_password(kw.get("password"))
     self.created = kw.get("created", dates.now())
     self.admin = kw.get("admin", kw.get("super_user", False))  # super users are also admins.
     self.super_user = kw.get("super_user", False)
     self.set_apikey(**kw)
Esempio n. 9
0
 def __init__(self, **kw):
     self.name = kw.get('name')
     self.email = kw.get('email')
     self.set_password(kw.get('password'))
     self.created = kw.get('created', dates.now())
     self.admin = kw.get('admin', kw.get('super_user', False)) # super users are also admins.
     self.super_user = kw.get('super_user', False)
     self.set_apikey(**kw)
Esempio n. 10
0
 def remove_recipe(self, recipe):
     """
     Remove a scheduled job from the list of scheduled jobs.
     """
     print 'Removing: {} at {}'.format(recipe, dates.now())
     self._running_recipes.pop(recipe.id)
     gevent.kill(self._greenlets[recipe.id])
     self._greenlets.pop(recipe.id)
Esempio n. 11
0
 def get_created(self, obj):
     """
     return earliest time of candidates or current time.
     """
     candidates = self.get_candidates(obj, DATE_CANDIDATE_JSONPATH)
     if len(candidates) > 0:
         return dates.from_struct_time(sorted(candidates)[0])
     else:
         return dates.now()
Esempio n. 12
0
 def get_created(self, obj):
     """
     return earliest time of candidates or current time.
     """
     candidates = self.get_candidates(obj, DATE_CANDIDATE_JSONPATH)
     if len(candidates) > 0:
         return dates.from_struct_time(sorted(candidates)[0])
     else:
         return dates.now()
Esempio n. 13
0
 def setup(self):
     """
     parse max age argument.
     """
     max_age = self.options.get('max_age')
     if max_age:
         self.max_age = dates.now() - timedelta(days=max_age)
     else:
         self.max_age = datetime(1900, 1, 1, tzinfo=pytz.UTC)
Esempio n. 14
0
    def run(self):
        d = dates.now() - timedelta(days=self.options.get('days', 7))
        results = self.api.events.search(statuses='deleted',
                                         updated_before=d.isoformat(),
                                         per_page=100,
                                         fields='id')

        for event in results.get('events', []):
            self.api.events.delete(event['id'], force=True)
Esempio n. 15
0
 def __init__(self, **kw):
     self.name = kw.get('name')
     self.email = kw.get('email')
     self.set_password(kw.get('password'))
     self.created = kw.get('created', dates.now())
     self.admin = kw.get('admin',
                         kw.get('super_user',
                                False))  # super users are also admins.
     self.super_user = kw.get('super_user', False)
     self.set_apikey(**kw)
Esempio n. 16
0
    def run(self):
        d = dates.now() - timedelta(days=self.options.get('days', 7))
        results = self.api.events.search(
            statuses='deleted',
            updated_before=d.isoformat(),
            per_page=100,
            fields='id')

        for event in results.get('events', []):
            self.api.events.delete(event['id'], force=True)
Esempio n. 17
0
def url_for_job_status(**kw):
    """
    Generate a url for a job status
    """
    # add context
    kw['orig_url'] = request.url
    kw['started'] = dates.now().isoformat()
    path = url_for('jobs.get_status', **kw)
    kw['status_url'] = urljoin(settings.API_URL, path)
    return kw
Esempio n. 18
0
def url_for_job_status(**kw):
    """
    Generate a url for a job status
    """
    # add context
    kw['orig_url'] = request.url
    kw['started'] = dates.now().isoformat()
    path = url_for('jobs.get_status', **kw)
    kw['status_url'] = urljoin(settings.API_URL, path)
    return kw
Esempio n. 19
0
def fb_extend_oauth_token(temp_access_token):
    url = _graph_url + "oauth/access_token"
    params = {
        "grant_type": "fb_exchange_token",
        "client_id": settings.FACEBOOK_APP_ID,
        "client_secret": settings.FACEBOOK_APP_SECRET,
        "fb_exchange_token": temp_access_token,
    }
    r = requests.get(url=url, params=params)
    token = parse_utf8_qsl(r.content)
    token["expires"] = dates.parse_ts(dates.now(ts=True) + int(token["expires"])).isoformat()
    return token
Esempio n. 20
0
def fb_extend_oauth_token(temp_access_token):
    url = _graph_url + "oauth/access_token"
    params = {
        'grant_type': 'fb_exchange_token',
        'client_id': settings.FACEBOOK_APP_ID,
        'client_secret': settings.FACEBOOK_APP_SECRET,
        'fb_exchange_token': temp_access_token
    }
    r = requests.get(url=url, params=params)
    token = parse_utf8_qsl(r.content)
    token['expires'] = dates.parse_ts(
        dates.now(ts=True) + int(token['expires'])).isoformat()
    return token
Esempio n. 21
0
 def dispatch(self, msg, **kw):
     self.server.outbox.login()
     kw = {
         'subject': "{} <{}> {}".format(
             settings.NOTIFY_EMAIL_SUBJECT_PREFIX,
             kw.get('subject', 'none'),
             dates.now().isoformat()),
         'body': msg,
         'to_': kw.get('to_', ",".join(settings.NOTIFY_EMAIL_RECIPIENTS)),
         'from_':  kw.get('from_', settings.MAIL_USERNAME)
     }
     self.server.outbox.send(**kw)
     self.server.outbox.logout()
Esempio n. 22
0
def fb_extend_oauth_token(temp_access_token):
    url = _graph_url + "oauth/access_token"
    params = {
        'grant_type': 'fb_exchange_token',
        'client_id': settings.FACEBOOK_APP_ID,
        'client_secret': settings.FACEBOOK_APP_SECRET,
        'fb_exchange_token': temp_access_token
    }
    r = requests.get(url=url, params=params)
    token = parse_utf8_qsl(r.content)
    token['expires'] = dates.parse_ts(
        dates.now(ts=True) + int(token['expires'])).isoformat()
    return token
Esempio n. 23
0
def content_timeseries_to_summary(org, num_hours=24):
    """
    Rollup content-timseries metrics into summaries.
    Optimize this query by only updating content items whose
    timeseries have been updated in last X hours.
    """

    # just use this to generate a giant timeseries select with computed
    # metrics.
    ts = QueryContentMetricTimeseries(org, org.content_item_ids)

    # generate aggregation statments + list of metric names.
    summary_pattern = "{agg}({name}) AS {name}"
    select_statements = []
    metrics = []
    for n, m in org.content_timeseries_metric_rollups.items():
        ss = summary_pattern.format(**m)
        select_statements.append(ss)
        metrics.append(n)

    qkw = {
        'select_statements': ",\n".join(select_statements),
        'metrics': ", ".join(metrics),
        'org_id': org.id,
        'last_updated': (dates.now() - timedelta(hours=num_hours)).isoformat(),
        'ts_query': ts.query
    }

    q = """SELECT upsert_content_metric_summary({org_id}, content_item_id, metrics::text)
           FROM  (
              SELECT
                content_item_id,
                (SELECT row_to_json(_) from (SELECT {metrics}) as _) as metrics
              FROM (
                 SELECT
                    content_item_id,
                    {select_statements}
                FROM ({ts_query}) zzzz
                WHERE content_item_id in (
                    SELECT
                        distinct(content_item_id)
                    FROM content_metric_timeseries
                    WHERE updated > '{last_updated}'
                    )
                GROUP BY content_item_id
                ) t1
            ) t2
        """.format(**qkw)
    db.session.execute(q)
    db.session.commit()
    return True
Esempio n. 24
0
def content_timeseries_to_summary(org, num_hours=24):
    """
    Rollup content-timseries metrics into summaries.
    Optimize this query by only updating content items whose
    timeseries have been updated in last X hours.
    """

    # just use this to generate a giant timeseries select with computed
    # metrics.
    ts = QueryContentMetricTimeseries(org, org.content_item_ids)

    # generate aggregation statments + list of metric names.
    summary_pattern = "{agg}({name}) AS {name}"
    select_statements = []
    metrics = []
    for n, m in org.content_timeseries_metric_rollups.items():
        ss = summary_pattern.format(**m)
        select_statements.append(ss)
        metrics.append(n)

    qkw = {
        'select_statements': ",\n".join(select_statements),
        'metrics': ", ".join(metrics),
        'org_id': org.id,
        'last_updated': (dates.now() - timedelta(hours=num_hours)).isoformat(),
        'ts_query': ts.query
    }

    q = """SELECT upsert_content_metric_summary({org_id}, content_item_id, metrics::text)
           FROM  (
              SELECT
                content_item_id,
                (SELECT row_to_json(_) from (SELECT {metrics}) as _) as metrics
              FROM (
                 SELECT
                    content_item_id,
                    {select_statements}
                FROM ({ts_query}) zzzz
                WHERE content_item_id in (
                    SELECT
                        distinct(content_item_id)
                    FROM content_metric_timeseries
                    WHERE updated > '{last_updated}'
                    )
                GROUP BY content_item_id
                ) t1
            ) t2
        """.format(**qkw)
    db.session.execute(q)
    db.session.commit()
    return True
Esempio n. 25
0
 def remove_recipe(self, recipe, **kw):
     """
     Remove a scheduled job from the list of scheduled jobs.
     """
     
     if kw.get('log', True):
         msg = 'Removing {} recipe ({} / {}) at {}'\
             .format(recipe.schedule_by, recipe.id, recipe.slug, dates.now())
         self.log(msg)
     self._running_recipes.pop(recipe.id)
     self._running_recipes.pop('{}:reset'.format(recipe.id))
     greenlet = self._greenlets.pop(recipe.id)
     if greenlet:
         gevent.kill(greenlet)
Esempio n. 26
0
    def get(self, *args, **kw):
        """
        The main get/cache function.
        """
        # get a custom ttl, fallback on default
        ttl = kw.pop('ttl', self.ttl)

        # format the key
        key = self.format_key(*args, **kw)

        # last modified key
        lm_key = "{}:last_modified".format(key)

        # attempt to get the object from redis
        if not self.debug:
            obj = self.redis.get(key)
        else:
            obj = None

        # if it doesn't exist, proceed with work
        if not obj:

            # not cached
            is_cached = False

            obj = self.work(*args, **kw)

            # if the worker returns None, break out
            if not obj:
                return CacheResponse(key, obj, None, False)

            # set the object in redis at the specified
            # key with the specified ttl
            self.redis.set(key, self.serialize(obj), ex=ttl)

            # set the last modified time
            last_modified = dates.now()
            self.redis.set(lm_key, last_modified.isoformat(), ex=ttl)

        else:
            # is cached
            is_cached = True

            # if it does exist, deserialize it.
            obj = self.deserialize(obj)

            # get the cached last modified time
            last_modified = dates.parse_iso(self.redis.get(lm_key))

        return CacheResponse(key, obj, last_modified, is_cached)
Esempio n. 27
0
    def get(self, *args, **kw):
        """
        The main get/cache function.
        """
        # get a custom ttl, fallback on default
        ttl = kw.pop('ttl', self.ttl)

        # format the key
        key = self.format_key(*args, **kw)

        # last modified key
        lm_key = "{}:last_modified".format(key)

        # attempt to get the object from redis
        if not self.debug:
            obj = self.redis.get(key)
        else:
            obj = None

        # if it doesn't exist, proceed with work
        if not obj:

            # not cached
            is_cached = False

            obj = self.work(*args, **kw)

            # if the worker returns None, break out
            if not obj:
                return CacheResponse(key, obj, None, False)

            # set the object in redis at the specified
            # key with the specified ttl
            self.redis.set(key, self.serialize(obj), ex=ttl)

            # set the last modified time
            last_modified = dates.now()
            self.redis.set(lm_key, last_modified.isoformat(), ex=ttl)

        else:
            # is cached
            is_cached = True

            # if it does exist, deserialize it.
            obj = self.deserialize(obj)

            # get the cached last modified time
            last_modified = dates.parse_iso(self.redis.get(lm_key))

        return CacheResponse(key, obj, last_modified, is_cached)
Esempio n. 28
0
 def __init__(self, **kw):
     self.source_id = str(kw.get('source_id'))
     self.recipe_id = kw.get('recipe_id')
     self.org_id = kw.get('org_id')
     self.status = kw.get('status', 'pending')
     self.provenance = kw.get('provenance', 'recipe')
     self.url = kw.get('url')
     self.img_url = kw.get('img_url')
     self.thumbnail = kw.get('thumbnail')
     self.created = kw.get('created', dates.now())
     self.title = kw.get('title')
     self.description = kw.get('description')
     self.body = kw.get('body')
     self.authors = kw.get('authors', [])
     self.meta = kw.get('meta', {})
Esempio n. 29
0
 def __init__(self, **kw):
     self.source_id = str(kw.get('source_id'))
     self.recipe_id = kw.get('recipe_id')
     self.org_id = kw.get('org_id')
     self.status = kw.get('status', 'pending')
     self.provenance = kw.get('provenance', 'recipe')
     self.url = kw.get('url')
     self.img_url = kw.get('img_url')
     self.thumbnail = kw.get('thumbnail')
     self.created = kw.get('created', dates.now())
     self.title = kw.get('title')
     self.description = kw.get('description')
     self.body = kw.get('body')
     self.authors = kw.get('authors', [])
     self.meta = kw.get('meta', {})
Esempio n. 30
0
def content_summary_from_content_timeseries(org,
                                            content_item_ids=[],
                                            num_hours=24):
    """
    Rollup content-timseries metrics into summaries.
    Optimize this query by only updating content items
    which have had updates to their metrics in the last X hours.
    """

    # just use this to generate a giant timeseries select with computed
    # metrics.
    ts = QueryContentMetricTimeseries(org, content_item_ids, unit=None)
    ts.compute = False
    metrics, ss = _summary_select(org.content_timeseries_metric_rollups)

    qkw = {
        'select_statements': ss,
        'metrics': metrics,
        'org_id': org.id,
        'last_updated': (dates.now() - timedelta(hours=num_hours)).isoformat(),
        'ts_query': ts.query,
    }

    q = """SELECT upsert_content_metric_summary({org_id}, content_item_id, metrics::text)
           FROM  (
              SELECT
                content_item_id,
                (SELECT row_to_json(_) from (SELECT {metrics}) as _) as metrics
              FROM (
                 SELECT
                    content_item_id,
                    {select_statements}
                FROM ({ts_query}) zzzz
                WHERE zzzz.content_item_id in (
                    SELECT
                        distinct(content_item_id)
                    FROM content_metric_timeseries
                    WHERE updated > '{last_updated}'
                    )
                GROUP BY content_item_id
                ) t1
            ) t2
        """.format(**qkw)
    db.session.execute(q)
    db.session.commit()
    return True
Esempio n. 31
0
 def __init__(self, **kw):
     self.org_id = kw.get('org_id')
     self.recipe_id = kw.get('recipe_id')
     self.url = kw.get('url')
     self.type = kw.get('type')
     self.provenance = kw.get('provenance', 'recipe')
     self.domain = kw.get('domain')
     self.created = kw.get('created', dates.now())
     self.site_name = kw.get('site_name')
     self.favicon = kw.get('favicon')
     self.img_url = kw.get('img_url')
     self.thumbnail = kw.get('thumbnail')
     self.title = kw.get('title')
     self.description = kw.get('description')
     self.body = kw.get('body')
     self.active = kw.get('active', True)
     self.meta = kw.get('meta', {})
Esempio n. 32
0
 def __init__(self, **kw):
     self.org_id = kw.get('org_id')
     self.recipe_id = kw.get('recipe_id')
     self.url = kw.get('url')
     self.type = kw.get('type')
     self.provenance = kw.get('provenance', 'recipe')
     self.domain = kw.get('domain')
     self.created = kw.get('created', dates.now())
     self.site_name = kw.get('site_name')
     self.favicon = kw.get('favicon')
     self.img_url = kw.get('img_url')
     self.thumbnail = kw.get('thumbnail')
     self.title = kw.get('title')
     self.description = kw.get('description')
     self.body = kw.get('body')
     self.active = kw.get('active', True)
     self.meta = kw.get('meta', {})
Esempio n. 33
0
    def cook(self, recipe):
        """
        Cook a recipe.
        """
        msg = 'Cooking recipe ({} / {}) at {}'\
            .format(recipe.id, recipe.slug,  dates.now())
        self.log(msg)

        # api connection.
        api = API(apikey=recipe.user.apikey, org=recipe.org_id)
                
        # cook the recipe
        job = api.recipes.cook(recipe.id)
        self.log('Job ID: {job_id}'.format(**job))

        # poll the job's status
        for res in api.jobs.poll(**job):
            self.log(res)
Esempio n. 34
0
def content_summary_from_content_timeseries(org, content_item_ids=[], num_hours=24):
    """
    Rollup content-timseries metrics into summaries.
    Optimize this query by only updating content items
    which have had updates to their metrics in the last X hours.
    """

    # just use this to generate a giant timeseries select with computed
    # metrics.
    ts = QueryContentMetricTimeseries(org, content_item_ids, unit=None)
    ts.compute = False
    metrics, ss = _summary_select(org.content_timeseries_metric_rollups)

    qkw = {
        'select_statements': ss,
        'metrics': metrics,
        'org_id': org.id,
        'last_updated': (dates.now() - timedelta(hours=num_hours)).isoformat(),
        'ts_query': ts.query,
    }

    q = """SELECT upsert_content_metric_summary({org_id}, content_item_id, metrics::text)
           FROM  (
              SELECT
                content_item_id,
                (SELECT row_to_json(_) from (SELECT {metrics}) as _) as metrics
              FROM (
                 SELECT
                    content_item_id,
                    {select_statements}
                FROM ({ts_query}) zzzz
                WHERE zzzz.content_item_id in (
                    SELECT
                        distinct(content_item_id)
                    FROM content_metric_timeseries
                    WHERE updated > '{last_updated}'
                    )
                GROUP BY content_item_id
                ) t1
            ) t2
        """.format(**qkw)
    db.session.execute(q)
    db.session.commit()
    return True
Esempio n. 35
0
def test_bulk_org_timeseries(nrows=1000):
    """
    Test bulk loading org timeseries metrics.
    """
    start = time.time()
    data = []
    for i in xrange(nrows):
        hours = i
        data.append({
            'metrics': {'ga_pageviews': i},
            'datetime': (dates.now() - timedelta(days=30, hours=hours)).isoformat()
        })

    # make request and return status url
    res = api.orgs.bulk_create_timeseries(data=data)
    poll_status_url(res.get('status_url'))
    end = time.time()
    print "Bulk Loading {} Org Timeseries Metrics Took {} seconds"\
        .format(nrows, round((end-start), 2))
Esempio n. 36
0
    def run_recipe(self, recipe, daily=False):
        """
        Run a scheduled recipe indefinitely
        """
        if daily:
            time_of_day = dates.parse_time_of_day(recipe.time_of_day)
            seconds_until = dates.seconds_until(time_of_day)
            time.sleep(seconds_until)
            # one day in seconds
            interval = 24 * 60 * 60

        else:
            interval = copy.copy(recipe.interval)

        while 1:
            print 'Running: {} at {}'.format(recipe, dates.now())
            api = API(apikey=recipe.user.apikey, org=recipe.org_id)
            # api.recipes.run(recipe.id)
            time.sleep(interval)
Esempio n. 37
0
 def _parse(self, raw):
     """
     pre process raw message
     """
     # validate the message
     msg = email.message_from_string(raw)
     # normalize
     clean = {}
     rec_parts = msg['Received'].split(';')
     if len(rec_parts) > 1:
         clean['datetime'] = dates.parse_any(rec_parts[-1].strip())
     else:
         clean['datetime'] = dates.now()
     clean['from'] = msg['from'].replace('<', '').replace('>', '')
     clean['to'] = msg['to'].replace('<', '').replace('>', '').strip()
     clean['subject'] = msg['subject'].strip()
     clean['body'] = msg.as_string()
     # return
     return clean
Esempio n. 38
0
 def _parse(self, raw):
     """
     pre process raw message
     """
     # validate the message
     msg = email.message_from_string(raw)
     # normalize
     clean = {}
     rec_parts = msg['Received'].split(';')
     if len(rec_parts) > 1:
         clean['datetime'] = dates.parse_any(rec_parts[-1].strip())
     else:
         clean['datetime'] = dates.now()
     clean['from'] = msg['from'].replace('<', '').replace('>', '')
     clean['to'] = msg['to'].replace('<', '').replace('>', '').strip()
     clean['subject'] = msg['subject'].strip()
     clean['body'] = msg.as_string()
     # return
     return clean
Esempio n. 39
0
def test_bulk_content_timeseries(nrows=10000):
    """
    Test bulk loading timeseries metrics
    """
    start = time.time()
    content_item_ids = [r['id'] for r in api.orgs.simple_content()]
    data = []
    for i in xrange(nrows):
        hours = nrows - i
        data.append({
            'content_item_id': choice(content_item_ids),
            'datetime': (dates.now() - timedelta(days=30, hours=hours)).isoformat(),
            'metrics': {'twitter_shares': i}
        })

    # make request and return status url
    res = api.content.bulk_create_timeseries(data)
    poll_status_url(res.get('status_url'))
    end = time.time()
    print "Bulk Loading {} Content Timeseries Metrics Took {} seconds"\
        .format(nrows, round((end-start), 2))
Esempio n. 40
0
def gen_content_metric_timeseries(org,
                                  content_items,
                                  metrics,
                                  n_content_item_timeseries_metrics=1000):
    # all
    date_list = []
    start = dates.now() - timedelta(days=7)
    for hour in range(1, (7 * 24) + 1):
        date_list.append(start + timedelta(hours=hour))

    for c in content_items:
        last_values = {}
        for i, d in enumerate(date_list):
            _metrics = {}
            for m in metrics:
                if 'timeseries' in m.content_levels:
                    if m.type == 'cumulative':
                        if m.name not in last_values:
                            last_values[m.name] = 0
                        last_values[m.name] += random_int(0, 100)
                        _metrics[m.name] = copy.copy(last_values[m.name])
                    else:
                        _metrics[m.name] = random_int(1, 1000)

            cmd_kwargs = {
                'org_id': org.id,
                'content_item_id': c.id,
                'datetime': d.isoformat(),
                'metrics': obj_to_json(_metrics)
            }
            # upsert command
            cmd = """SELECT upsert_content_metric_timeseries(
                        {org_id},
                        {content_item_id},
                        '{datetime}',
                        '{metrics}');
                   """.format(**cmd_kwargs)
            db.session.execute(cmd)
    db.session.commit()
Esempio n. 41
0
def test_bulk_org_timeseries(nrows=1000):
    """
    Test bulk loading org timeseries metrics.
    """
    start = time.time()
    data = []
    for i in xrange(nrows):
        hours = i
        data.append({
            'metrics': {
                'ga_pageviews': i
            },
            'datetime':
            (dates.now() - timedelta(days=30, hours=hours)).isoformat()
        })

    # make request and return status url
    res = api.orgs.bulk_create_timeseries(data=data)
    poll_status_url(res.get('status_url'))
    end = time.time()
    print "Bulk Loading {} Org Timeseries Metrics Took {} seconds"\
        .format(nrows, round((end-start), 2))
Esempio n. 42
0
def gen_content_metric_timeseries(org, content_items, metrics, n_content_item_timeseries_metrics=1000):
    # all
    date_list = []
    start = dates.now() - timedelta(days=7)
    for hour in range(1, (7*24)+1):
        date_list.append(start + timedelta(hours=hour))

    for c in content_items:
        last_values = {}
        for i, d in enumerate(date_list):
            _metrics = {}
            for m in metrics:
                if 'timeseries' in m.content_levels:
                    if m.type == 'cumulative':
                        if m.name not in last_values:
                            last_values[m.name] = 0
                        last_values[m.name] += random_int(0, 100)
                        _metrics[m.name] = copy.copy(last_values[m.name])
                    else:
                        _metrics[m.name] = random_int(1, 1000)

            cmd_kwargs = {
                'org_id': org.id,
                'content_item_id': c.id,
                'datetime': d.isoformat(),
                'metrics': obj_to_json(_metrics)
            }
            # upsert command
            cmd = """SELECT upsert_content_metric_timeseries(
                        {org_id},
                        {content_item_id},
                        '{datetime}',
                        '{metrics}');
                   """.format(**cmd_kwargs)
            db.session.execute(cmd)
    db.session.commit()
Esempio n. 43
0
 def run(self, **kw):
     """
     Endlessly run and update scheduled recipes.
     """
     interval = float(kw.get('interval', settings.SCHEDULER_REFRESH_INTERVAL))
     self.log('Starting Scheduler at {} with refresh interval of {} seconds'.format(dates.now(), interval))
     while True:
         self.set_session()
         self.update_scheduled_recipes()
         self.run_scheduled_recipes()
         time.sleep(interval)
         self.session.flush()
Esempio n. 44
0
def get_status(user, job_id):
    """
    Get the status of a queued job.
    """

    # parse args.
    queue = request.args.get('queue')
    if not queue:
        raise RequestError(
            'You must pass in the queue name to fetch a job\'s status')

    if not queue in queues:
        raise RequestError('"{}" is not a valid queue.'.format(queue))

    q = queues.get(queue)
    job = q.fetch_job(job_id)
    if not job:
        raise RequestError('A job with ID {} does not exist'.format(job_id))

    # fetch metadata about this job
    # from the session
    # parse args.
    started = request.args.get('started')
    orig_url = request.args.get('orig_url')

    if started:
        started = dates.parse_iso(started)

    # format return value
    ret = {
        'job_id': job_id,
        'queue': queue,
        'status': None,
        'started': started,
        'orig_url': orig_url
    }

    # determine time since start
    if started:
        ret['time_since_start'] = (dates.now() - started).seconds

    # determine status
    if job.is_queued:
        ret['status'] = 'queued'

    if job.is_started:
        ret['status'] = 'running'

    if job.is_failed:
        ret['status'] = 'error'
        ret['message'] = "An unknown error occurred."

    if job.is_finished:
        rv = job.return_value

        # job will return true if successful
        if rv is True:
            ret['status'] = 'success'

        # job will return an error if unsuccessful
        else:
            ret['status'] = 'error'
            ret['message'] = rv.message

    return jsonify(ret)
Esempio n. 45
0
 def setup(self):
     max_age = self.options.get('max_age')
     self.max_age = dates.now() - timedelta(days=max_age)
Esempio n. 46
0
 def age(self):
     if self.is_cached:
         return (dates.now() - self.last_modified).seconds
     return 0
Esempio n. 47
0
def get_status(user, job_id):
    """
    Get the status of a queued job.
    """

    # parse args.
    queue = request.args.get('queue')
    if not queue:
        raise RequestError(
            'You must pass in the queue name to fetch a job\'s status')

    if not queue in queues:
        raise RequestError(
            '"{}" is not a valid queue.'
            .format(queue))

    q = queues.get(queue)
    job = q.fetch_job(job_id)
    if not job:
        raise RequestError(
            'A job with ID {} does not exist'
            .format(job_id))

    # fetch metadata about this job
    # from the session
    # parse args.
    started = request.args.get('started')
    orig_url = request.args.get('orig_url')

    if started:
        started = dates.parse_iso(started)

    # format return value
    ret = {
        'job_id': job_id,
        'queue': queue,
        'status': None,
        'started': started,
        'orig_url': orig_url
    }

    # determine time since start
    if started:
        ret['time_since_start'] = (dates.now() - started).seconds

    # determine status
    if job.is_queued:
        ret['status'] = 'queued'

    if job.is_started:
        ret['status'] = 'running'

    if job.is_failed:
        ret['status'] = 'error'
        ret['message'] = "An unknown error occurred."

    if job.is_finished:
        rv = job.return_value

        # job will return true if successful
        if rv is True:
            ret['status'] = 'success'

        # job will return an error if unsuccessful
        else:
            ret['status'] = 'error'
            ret['message'] = str(rv.message)

    return jsonify(ret)
Esempio n. 48
0
def run(sous_chef_path, recipe_id, kw_key, **kw):
    """
    Do the work. This exists outside the class
    in order to enable pickling for the task queue.
    """
    recipe = db.session.query(Recipe).get(recipe_id)
    try:
        if kw_key:
            # load in kwargs
            kw = rds.get(kw_key)
            if not kw:
                raise InternalServerError(
                    'An unexpected error occurred while attempting to run a Sous Chef.'
                )
            kw = pickle_to_obj(kw)
            # delete them.
            rds.delete(kw_key)

        # import sous chef
        SousChef = sc_exec.from_import_path(sous_chef_path)

        # initialize it with kwargs
        kw['org'] = db.session\
            .query(Org).get(recipe.org.id)\
            .to_dict(incl_domains=True)
        kw['recipe'] = recipe.to_dict()
        sous_chef = SousChef(**kw)

        # indicate that the job is running
        if not kw.get('passthrough', False):
            recipe.status = 'running'
            db.session.add(recipe)
            db.session.commit()

        # cook it.
        data = sous_chef.cook()

        # passthrough the data.
        if kw.get('passthrough', False):
            return data

        # otherwise just exhaust the generator
        if isgenerator(data):
            data = list(data)

        # teardown this recipe
        sous_chef.teardown()

        # update status and next job from sous chef.
        recipe.status = "stable"
        recipe.traceback = None
        recipe.last_run = dates.now()
        if len(sous_chef.next_job.keys()):
            recipe.last_job = sous_chef.next_job
        db.session.add(recipe)
        db.session.commit()
        return True

    except:

        # always delete the kwargs.
        if kw_key:
            rds.delete(kw_key)

        if not kw.get('passthrough', False):
            db.session.rollback()
            recipe.status = "error"
            recipe.traceback = format_exc()
            recipe.last_run = dates.now()
            db.session.add(recipe)
            db.session.commit()

            # notification
            tb = format_exc()
            error_notification(recipe, tb)
            return MerlynneError(tb)

        raise MerlynneError(format_exc())
Esempio n. 49
0
def run(sous_chef_path, recipe_id, kw_key, **kw):
    """
    Do the work. This exists outside the class
    in order to enable pickling for the task queue.
    """
    recipe = db.session.query(Recipe).get(recipe_id)
    try:
        if kw_key:
            # load in kwargs
            kw = rds.get(kw_key)
            if not kw:
                raise InternalServerError(
                    'An unexpected error occurred while attempting to run a Sous Chef.'
                )
            kw = pickle_to_obj(kw)
            # delete them.
            rds.delete(kw_key)

        # import sous chef
        SousChef = sc_exec.from_import_path(sous_chef_path)

        # initialize it with kwargs
        kw['org'] = db.session\
            .query(Org).get(recipe.org.id)\
            .to_dict(incl_domains=True)
        kw['recipe'] = recipe.to_dict()
        sous_chef = SousChef(**kw)

        # indicate that the job is running
        if not kw.get('passthrough', False):
            recipe.status = 'running'
            db.session.add(recipe)
            db.session.commit()

        # cook it.
        data = sous_chef.cook()

        # passthrough the data.
        if kw.get('passthrough', False):
            return data

        # otherwise just exhaust the generator
        if isgenerator(data):
            data = list(data)

        # teardown this recipe
        sous_chef.teardown()

        # update status and next job from sous chef.
        recipe.status = "stable"
        recipe.traceback = None
        recipe.last_run = dates.now()
        if len(sous_chef.next_job.keys()):
            recipe.last_job = sous_chef.next_job
        db.session.add(recipe)
        db.session.commit()
        return True

    except:

        # always delete the kwargs.
        if kw_key:
            rds.delete(kw_key)

        if not kw.get('passthrough', False):
            db.session.rollback()
            recipe.status = "error"
            recipe.traceback = format_exc()
            recipe.last_run = dates.now()
            db.session.add(recipe)
            db.session.commit()

            # notification
            tb = format_exc()
            error_notification(recipe, tb)
            return MerlynneError(tb)

        raise MerlynneError(format_exc())
Esempio n. 50
0
 def add_recipe(self, recipe):
     """
     Add a scheduled recipe to the list of scheduled recipes.
     """
     print 'Adding {} at {}'.format(recipe, dates.now())
     self._running_recipes[recipe.id] = recipe