コード例 #1
0
def org_timeseries(obj, org_id=None, metrics_lookup=None, commit=True):
    """
    Ingest Timeseries Metrics for an organization.
    """
    cmd_kwargs = {"org_id": org_id}

    # parse datetime.
    if "datetime" not in obj:
        cmd_kwargs["datetime"] = dates.floor_now(unit="hour", value=1).isoformat()

    else:
        ds = obj.pop("datetime")
        dt = dates.parse_iso(ds)
        cmd_kwargs["datetime"] = dates.floor(dt, unit="hour", value=1).isoformat()

    metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=["org", "all"], check_timeseries=True)

    # upsert command
    cmd = """SELECT upsert_org_metric_timeseries(
                 {org_id},
                '{datetime}',
                '{metrics}')
           """.format(
        metrics=obj_to_json(metrics), **cmd_kwargs
    )

    if commit:
        try:
            db.session.execute(cmd)
        except Exception as err:
            raise RequestError(err.message)
        cmd_kwargs["metrics"] = metrics
        return cmd_kwargs
    return cmd
コード例 #2
0
ファイル: rss_feed.py プロジェクト: jjelosua/newslynx-core
 def setup(self):
     # get max publish date of the last time we ran this.
     max_date_last_run = self.last_job.get('max_date_last_run', None)
     if max_date_last_run:
         self.max_date_last_run = dates.parse_iso(max_date_last_run)
     else:
         self.max_date_last_run = None
コード例 #3
0
ファイル: feed.py プロジェクト: newslynx/newslynx-sc-rss
 def setup(self):
     # get max publish date of the last time we ran this.
     max_date_last_run = self.last_job.get('max_date_last_run', None)
     if max_date_last_run:
         self.max_date_last_run = dates.parse_iso(max_date_last_run)
     else:
         self.max_date_last_run = None
     self.log.info('Got max date: {}'.format(self.max_date_last_run))
     self.publish_dates = []
コード例 #4
0
 def valid_datetime(self, key, opt):
     """
     Validate a iso-datetime option.
     """
     v_opt = dates.parse_iso(opt)
     if not v_opt:
         return RecipeSchemaError(
             "{} should be a 'datetime' field but was passed '{}'."
             .format(key, opt))
     return v_opt
コード例 #5
0
 def valid_datetime(self, key, opt):
     """
     Validate a iso-datetime option.
     """
     v_opt = dates.parse_iso(opt)
     if not v_opt:
         return RecipeSchemaError(
             "{} should be a 'datetime' field but was passed '{}'."
             .format(key, opt))
     return v_opt
コード例 #6
0
def arg_date(name, default=None):
    """ Fetch a query argument, as a datetime object."""
    v = request.args.get(name, '')
    if not len(v):
        return default
    v = dates.parse_iso(v)
    if not v:
        raise RequestError(
            'Invalid value for "{}". '
            'It should be an iso8601 datetime string.'.format(name))
    return v
コード例 #7
0
ファイル: util.py プロジェクト: jjelosua/newslynx-core
def arg_date(name, default=None):
    """ Fetch a query argument, as a datetime object."""
    v = request.args.get(name, '')
    if not len(v):
        return default
    v = dates.parse_iso(v)
    if not v:
        raise RequestError('Invalid value for "{}". '
                           'It should be an iso8601 datetime string.'
                           .format(name))
    return v
コード例 #8
0
ファイル: util.py プロジェクト: jjelosua/newslynx-core
def arg_list(name, default=None, typ=str, exclusions=False):
    """ get a comma-separated list of args, asserting a type.
    includes the ability to parse out exclusions via '!' or '-' prefix"""

    type_string = str(typ).split("'")[1]
    include_values = []
    exclude_values = []

    vv = arg_str(name, default=None)
    if not vv:
        if exclusions:
            return default, default
        return default

    for i, value in enumerate(vv.split(','), start=1):

        if value.startswith('!') or value.startswith('-'):
            value = value[1:]
            exclude = True

        else:
            exclude = False

        try:

            if 'date' in type_string:
                v = dates.parse_iso(value)
                if not v:
                    raise ValueError

            else:
                v = typ(value)

        except:
            raise RequestError(
                '"{}", element  {} of "{}" is invalid. '
                'It should be a {} type.'
                .format(value), i, name, type_string)

        if exclusions:
            if exclude:
                exclude_values.append(v)

            else:
                include_values.append(v)

        else:
            include_values.append(v)

    if exclusions:
        return include_values, exclude_values

    return include_values
コード例 #9
0
def arg_list(name, default=None, typ=str, exclusions=False):
    """ get a comma-separated list of args, asserting a type.
    includes the ability to parse out exclusions via '!' or '-' prefix"""

    type_string = str(typ).split("'")[1]
    include_values = []
    exclude_values = []

    vv = arg_str(name, default=None)
    if not vv:
        if exclusions:
            return default, default
        return default

    for i, value in enumerate(vv.split(','), start=1):

        if value.startswith('!') or value.startswith('-'):
            value = value[1:]
            exclude = True

        else:
            exclude = False

        try:

            if 'date' in type_string:
                v = dates.parse_iso(value)
                if not v:
                    raise ValueError

            else:
                v = typ(value)

        except:
            raise RequestError('"{}", element  {} of "{}" is invalid. '
                               'It should be a {} type.'.format(
                                   value, i, name, type_string))

        if exclusions:
            if exclude:
                exclude_values.append(v)

            else:
                include_values.append(v)

        else:
            include_values.append(v)

    if exclusions:
        return include_values, exclude_values

    return include_values
コード例 #10
0
ファイル: ingest_util.py プロジェクト: jjelosua/newslynx-core
def prepare_date(o, field):
    """
    Prepare a date
    """
    if field not in o:
        return None
    if o[field] is None:
        return None
    dt = dates.parse_iso(o[field])
    if not dt:
        raise RequestError('{}: {} is an invalid date.'.format(
            field, o[field]))
    return dt
コード例 #11
0
ファイル: cache.py プロジェクト: abelsonlive/newslynx-core
    def get(self, *args, **kw):
        """
        The main get/cache function.
        """
        # get a custom ttl, fallback on default
        ttl = kw.pop('ttl', self.ttl)

        # format the key
        key = self.format_key(*args, **kw)

        # last modified key
        lm_key = "{}:last_modified".format(key)

        # attempt to get the object from redis
        if not self.debug:
            obj = self.redis.get(key)
        else:
            obj = None

        # if it doesn't exist, proceed with work
        if not obj:

            # not cached
            is_cached = False

            obj = self.work(*args, **kw)

            # if the worker returns None, break out
            if not obj:
                return CacheResponse(key, obj, None, False)

            # set the object in redis at the specified
            # key with the specified ttl
            self.redis.set(key, self.serialize(obj), ex=ttl)

            # set the last modified time
            last_modified = dates.now()
            self.redis.set(lm_key, last_modified.isoformat(), ex=ttl)

        else:
            # is cached
            is_cached = True

            # if it does exist, deserialize it.
            obj = self.deserialize(obj)

            # get the cached last modified time
            last_modified = dates.parse_iso(self.redis.get(lm_key))

        return CacheResponse(key, obj, last_modified, is_cached)
コード例 #12
0
ファイル: ingest.py プロジェクト: newslynx/newslynx-core
def _prepare_metric_date(obj):
    """
    Round a metric to a configurable unit.
    """
    u = settings.METRICS_MIN_DATE_UNIT
    v = settings.METRICS_MIN_DATE_VALUE

    # set current time if no time exists.
    if 'datetime' not in obj:
        return dates.floor_now(unit=u, value=v).isoformat()

    ds = obj.pop('datetime')
    dt = dates.parse_iso(ds, enforce_tz=True)
    return dates.floor(dt, unit=u, value=v).isoformat()
コード例 #13
0
 def run(self):
     """
     Count shares for all content items.
     """
     for content_item in self.api.orgs.simple_content():
         created = dates.parse_iso(content_item['created'])
         if created < self.max_age:
             continue
         url = content_item.get('url')
         if url:
             data = shares.count(url)
             data.pop('url', None)
             data['content_item_id'] = content_item.get('id')
             yield data
コード例 #14
0
ファイル: ingest_util.py プロジェクト: jjelosua/newslynx-core
def prepare_date(o, field):
    """
    Prepare a date
    """
    if field not in o:
        return None
    if o[field] is None:
        return None
    dt = dates.parse_iso(o[field])
    if not dt:
        raise RequestError(
            '{}: {} is an invalid date.'
            .format(field, o[field]))
    return dt
コード例 #15
0
    def get(self, *args, **kw):
        """
        The main get/cache function.
        """
        # get a custom ttl, fallback on default
        ttl = kw.pop('ttl', self.ttl)

        # format the key
        key = self.format_key(*args, **kw)

        # last modified key
        lm_key = "{}:last_modified".format(key)

        # attempt to get the object from redis
        if not self.debug:
            obj = self.redis.get(key)
        else:
            obj = None

        # if it doesn't exist, proceed with work
        if not obj:

            # not cached
            is_cached = False

            obj = self.work(*args, **kw)

            # if the worker returns None, break out
            if not obj:
                return CacheResponse(key, obj, None, False)

            # set the object in redis at the specified
            # key with the specified ttl
            self.redis.set(key, self.serialize(obj), ex=ttl)

            # set the last modified time
            last_modified = dates.now()
            self.redis.set(lm_key, last_modified.isoformat(), ex=ttl)

        else:
            # is cached
            is_cached = True

            # if it does exist, deserialize it.
            obj = self.deserialize(obj)

            # get the cached last modified time
            last_modified = dates.parse_iso(self.redis.get(lm_key))

        return CacheResponse(key, obj, last_modified, is_cached)
コード例 #16
0
ファイル: ingest.py プロジェクト: abelsonlive/newslynx-core
def _prepare_metric_date(obj):
    """
    Round a metric to a configurable unit.
    """
    u = settings.METRICS_MIN_DATE_UNIT
    v = settings.METRICS_MIN_DATE_VALUE

    # set current time if no time exists.
    if 'datetime' not in obj:
        return dates.floor_now(unit=u, value=v).isoformat()

    ds = obj.pop('datetime')
    dt = dates.parse_iso(ds, enforce_tz=True)
    return dates.floor(dt, unit=u, value=v).isoformat()
コード例 #17
0
def content_timeseries(obj,
                       org_id=None,
                       metrics_lookup=None,
                       content_item_ids=None,
                       commit=True):
    """
    Ingest Timeseries Metrics for a content item.
    """
    # if not content_item_id or not org or not metrics_lookup:
    #     raise RequestError('Missing required kwargs.')
    content_item_id = obj.pop('content_item_id')
    if not content_item_id:
        raise RequestError('Object is missing a "content_item_id"')
    if not content_item_id in content_item_ids:
        raise RequestError(
            'Content Item with ID {} doesnt exist'.format(content_item_id))

    cmd_kwargs = {"org_id": org_id, "content_item_id": content_item_id}

    # parse datetime.
    if 'datetime' not in obj:
        cmd_kwargs['datetime'] = dates.floor_now(unit='hour',
                                                 value=1).isoformat()

    else:
        ds = obj.pop('datetime')
        dt = dates.parse_iso(ds)
        cmd_kwargs['datetime'] = dates.floor(dt, unit='hour',
                                             value=1).isoformat()

    metrics = ingest_util.prepare_metrics(obj,
                                          metrics_lookup,
                                          valid_levels=['content_item', 'all'],
                                          check_timeseries=True)

    # upsert command
    cmd = """SELECT upsert_content_metric_timeseries(
                {org_id},
                {content_item_id},
                '{datetime}',
                '{metrics}')
           """.format(metrics=obj_to_json(metrics), **cmd_kwargs)

    if commit:
        try:
            db.session.execute(cmd)
        except Exception as err:
            raise RequestError(err.message)
        cmd_kwargs['metrics'] = metrics
    return cmd
コード例 #18
0
def content_timeseries(obj, org_id=None, metrics_lookup=None, content_item_ids=None, commit=True):
    """
    Ingest Timeseries Metrics for a content item.
    """
    # if not content_item_id or not org or not metrics_lookup:
    #     raise RequestError('Missing required kwargs.')
    content_item_id = obj.pop("content_item_id")
    if not content_item_id:
        raise RequestError('Object is missing a "content_item_id"')
    if not content_item_id in content_item_ids:
        raise RequestError("Content Item with ID {} doesnt exist".format(content_item_id))

    cmd_kwargs = {"org_id": org_id, "content_item_id": content_item_id}

    # parse datetime.
    if "datetime" not in obj:
        cmd_kwargs["datetime"] = dates.floor_now(unit="hour", value=1).isoformat()

    else:
        ds = obj.pop("datetime")
        dt = dates.parse_iso(ds)
        cmd_kwargs["datetime"] = dates.floor(dt, unit="hour", value=1).isoformat()

    metrics = ingest_util.prepare_metrics(
        obj, metrics_lookup, valid_levels=["content_item", "all"], check_timeseries=True
    )

    # upsert command
    cmd = """SELECT upsert_content_metric_timeseries(
                {org_id},
                {content_item_id},
                '{datetime}',
                '{metrics}')
           """.format(
        metrics=obj_to_json(metrics), **cmd_kwargs
    )

    if commit:
        try:
            db.session.execute(cmd)
        except Exception as err:
            raise RequestError(err.message)
        cmd_kwargs["metrics"] = metrics
    return cmd
コード例 #19
0
def org_timeseries(obj, org_id=None, metrics_lookup=None, commit=True):
    """
    Ingest Timeseries Metrics for an organization.
    """
    cmd_kwargs = {'org_id': org_id}

    # parse datetime.
    if 'datetime' not in obj:
        cmd_kwargs['datetime'] = dates.floor_now(unit='hour',
                                                 value=1).isoformat()

    else:
        ds = obj.pop('datetime')
        dt = dates.parse_iso(ds)
        cmd_kwargs['datetime'] = dates.floor(dt, unit='hour',
                                             value=1).isoformat()

    metrics = ingest_util.prepare_metrics(obj,
                                          metrics_lookup,
                                          valid_levels=['org', 'all'],
                                          check_timeseries=True)

    # upsert command
    cmd = """SELECT upsert_org_metric_timeseries(
                 {org_id},
                '{datetime}',
                '{metrics}')
           """.format(metrics=obj_to_json(metrics), **cmd_kwargs)

    if commit:
        try:
            db.session.execute(cmd)
        except Exception as err:
            raise RequestError(err.message)
        cmd_kwargs['metrics'] = metrics
        return cmd_kwargs
    return cmd
コード例 #20
0
ファイル: meta.py プロジェクト: jjelosua/newslynx-core
def publish_date(soup, source_url=None):
    """
    Extract publish date from meta / source_url.
    """

    # try isodate first
    for tag in PUBLISH_DATE_TAGS:
        ds = _extract_tag_data(soup, tag)
        if ds:
            dt = dates.parse_iso(ds, enforce_tz=False)
            if dt:
                return dt

    # try a timestamp next.
    for tag in PUBLISH_DATE_TAGS:
        ds = _extract_tag_data(soup, tag)
        if ds:
            dt = dates.parse_ts(ds)
            if dt:
                return dt

    # try any date next.
    for tag in PUBLISH_DATE_TAGS:
        ds = _extract_tag_data(soup, tag)
        if ds:
            dt = dates.parse_any(ds, enforce_tz=False)
            if dt:
                return dt

    # fallback on url regex
    if source_url:
        dm = re_url_date.search(source_url)
        if dm:
            ds = dm.group(0)
            dt = dates.parse_any(ds, enforce_tz=False)
            if dt:
                return dt
コード例 #21
0
ファイル: meta.py プロジェクト: jjelosua/newslynx-core
def publish_date(soup, source_url=None):
    """
    Extract publish date from meta / source_url.
    """

    # try isodate first
    for tag in PUBLISH_DATE_TAGS:
        ds = _extract_tag_data(soup, tag)
        if ds:
            dt = dates.parse_iso(ds, enforce_tz=False)
            if dt:
                return dt

    # try a timestamp next.
    for tag in PUBLISH_DATE_TAGS:
        ds = _extract_tag_data(soup, tag)
        if ds:
            dt = dates.parse_ts(ds)
            if dt:
                return dt

    # try any date next.
    for tag in PUBLISH_DATE_TAGS:
        ds = _extract_tag_data(soup, tag)
        if ds:
            dt = dates.parse_any(ds, enforce_tz=False)
            if dt:
                return dt

    # fallback on url regex
    if source_url:
        dm = re_url_date.search(source_url)
        if dm:
            ds = dm.group(0)
            dt = dates.parse_any(ds, enforce_tz=False)
            if dt:
                return dt
コード例 #22
0
 def created(self):
     if self.post.get('updated_time'):
         return dates.parse_iso(self.post['updated_time'])
     return None
コード例 #23
0
 def created(self):
     if self.post.get('updated_time'):
         return dates.parse_iso(self.post['updated_time'])
     return None
コード例 #24
0
ファイル: jobs_api.py プロジェクト: abelsonlive/newslynx-core
def get_status(user, job_id):
    """
    Get the status of a queued job.
    """

    # parse args.
    queue = request.args.get('queue')
    if not queue:
        raise RequestError(
            'You must pass in the queue name to fetch a job\'s status')

    if not queue in queues:
        raise RequestError(
            '"{}" is not a valid queue.'
            .format(queue))

    q = queues.get(queue)
    job = q.fetch_job(job_id)
    if not job:
        raise RequestError(
            'A job with ID {} does not exist'
            .format(job_id))

    # fetch metadata about this job
    # from the session
    # parse args.
    started = request.args.get('started')
    orig_url = request.args.get('orig_url')

    if started:
        started = dates.parse_iso(started)

    # format return value
    ret = {
        'job_id': job_id,
        'queue': queue,
        'status': None,
        'started': started,
        'orig_url': orig_url
    }

    # determine time since start
    if started:
        ret['time_since_start'] = (dates.now() - started).seconds

    # determine status
    if job.is_queued:
        ret['status'] = 'queued'

    if job.is_started:
        ret['status'] = 'running'

    if job.is_failed:
        ret['status'] = 'error'
        ret['message'] = "An unknown error occurred."

    if job.is_finished:
        rv = job.return_value

        # job will return true if successful
        if rv is True:
            ret['status'] = 'success'

        # job will return an error if unsuccessful
        else:
            ret['status'] = 'error'
            ret['message'] = str(rv.message)

    return jsonify(ret)
コード例 #25
0
def get_status(user, job_id):
    """
    Get the status of a queued job.
    """

    # parse args.
    queue = request.args.get('queue')
    if not queue:
        raise RequestError(
            'You must pass in the queue name to fetch a job\'s status')

    if not queue in queues:
        raise RequestError('"{}" is not a valid queue.'.format(queue))

    q = queues.get(queue)
    job = q.fetch_job(job_id)
    if not job:
        raise RequestError('A job with ID {} does not exist'.format(job_id))

    # fetch metadata about this job
    # from the session
    # parse args.
    started = request.args.get('started')
    orig_url = request.args.get('orig_url')

    if started:
        started = dates.parse_iso(started)

    # format return value
    ret = {
        'job_id': job_id,
        'queue': queue,
        'status': None,
        'started': started,
        'orig_url': orig_url
    }

    # determine time since start
    if started:
        ret['time_since_start'] = (dates.now() - started).seconds

    # determine status
    if job.is_queued:
        ret['status'] = 'queued'

    if job.is_started:
        ret['status'] = 'running'

    if job.is_failed:
        ret['status'] = 'error'
        ret['message'] = "An unknown error occurred."

    if job.is_finished:
        rv = job.return_value

        # job will return true if successful
        if rv is True:
            ret['status'] = 'success'

        # job will return an error if unsuccessful
        else:
            ret['status'] = 'error'
            ret['message'] = rv.message

    return jsonify(ret)