def org_timeseries(obj, org_id=None, metrics_lookup=None, commit=True): """ Ingest Timeseries Metrics for an organization. """ cmd_kwargs = {"org_id": org_id} # parse datetime. if "datetime" not in obj: cmd_kwargs["datetime"] = dates.floor_now(unit="hour", value=1).isoformat() else: ds = obj.pop("datetime") dt = dates.parse_iso(ds) cmd_kwargs["datetime"] = dates.floor(dt, unit="hour", value=1).isoformat() metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=["org", "all"], check_timeseries=True) # upsert command cmd = """SELECT upsert_org_metric_timeseries( {org_id}, '{datetime}', '{metrics}') """.format( metrics=obj_to_json(metrics), **cmd_kwargs ) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs["metrics"] = metrics return cmd_kwargs return cmd
def setup(self): # get max publish date of the last time we ran this. max_date_last_run = self.last_job.get('max_date_last_run', None) if max_date_last_run: self.max_date_last_run = dates.parse_iso(max_date_last_run) else: self.max_date_last_run = None
def setup(self): # get max publish date of the last time we ran this. max_date_last_run = self.last_job.get('max_date_last_run', None) if max_date_last_run: self.max_date_last_run = dates.parse_iso(max_date_last_run) else: self.max_date_last_run = None self.log.info('Got max date: {}'.format(self.max_date_last_run)) self.publish_dates = []
def valid_datetime(self, key, opt): """ Validate a iso-datetime option. """ v_opt = dates.parse_iso(opt) if not v_opt: return RecipeSchemaError( "{} should be a 'datetime' field but was passed '{}'." .format(key, opt)) return v_opt
def arg_date(name, default=None): """ Fetch a query argument, as a datetime object.""" v = request.args.get(name, '') if not len(v): return default v = dates.parse_iso(v) if not v: raise RequestError( 'Invalid value for "{}". ' 'It should be an iso8601 datetime string.'.format(name)) return v
def arg_date(name, default=None): """ Fetch a query argument, as a datetime object.""" v = request.args.get(name, '') if not len(v): return default v = dates.parse_iso(v) if not v: raise RequestError('Invalid value for "{}". ' 'It should be an iso8601 datetime string.' .format(name)) return v
def arg_list(name, default=None, typ=str, exclusions=False): """ get a comma-separated list of args, asserting a type. includes the ability to parse out exclusions via '!' or '-' prefix""" type_string = str(typ).split("'")[1] include_values = [] exclude_values = [] vv = arg_str(name, default=None) if not vv: if exclusions: return default, default return default for i, value in enumerate(vv.split(','), start=1): if value.startswith('!') or value.startswith('-'): value = value[1:] exclude = True else: exclude = False try: if 'date' in type_string: v = dates.parse_iso(value) if not v: raise ValueError else: v = typ(value) except: raise RequestError( '"{}", element {} of "{}" is invalid. ' 'It should be a {} type.' .format(value), i, name, type_string) if exclusions: if exclude: exclude_values.append(v) else: include_values.append(v) else: include_values.append(v) if exclusions: return include_values, exclude_values return include_values
def arg_list(name, default=None, typ=str, exclusions=False): """ get a comma-separated list of args, asserting a type. includes the ability to parse out exclusions via '!' or '-' prefix""" type_string = str(typ).split("'")[1] include_values = [] exclude_values = [] vv = arg_str(name, default=None) if not vv: if exclusions: return default, default return default for i, value in enumerate(vv.split(','), start=1): if value.startswith('!') or value.startswith('-'): value = value[1:] exclude = True else: exclude = False try: if 'date' in type_string: v = dates.parse_iso(value) if not v: raise ValueError else: v = typ(value) except: raise RequestError('"{}", element {} of "{}" is invalid. ' 'It should be a {} type.'.format( value, i, name, type_string)) if exclusions: if exclude: exclude_values.append(v) else: include_values.append(v) else: include_values.append(v) if exclusions: return include_values, exclude_values return include_values
def prepare_date(o, field): """ Prepare a date """ if field not in o: return None if o[field] is None: return None dt = dates.parse_iso(o[field]) if not dt: raise RequestError('{}: {} is an invalid date.'.format( field, o[field])) return dt
def get(self, *args, **kw): """ The main get/cache function. """ # get a custom ttl, fallback on default ttl = kw.pop('ttl', self.ttl) # format the key key = self.format_key(*args, **kw) # last modified key lm_key = "{}:last_modified".format(key) # attempt to get the object from redis if not self.debug: obj = self.redis.get(key) else: obj = None # if it doesn't exist, proceed with work if not obj: # not cached is_cached = False obj = self.work(*args, **kw) # if the worker returns None, break out if not obj: return CacheResponse(key, obj, None, False) # set the object in redis at the specified # key with the specified ttl self.redis.set(key, self.serialize(obj), ex=ttl) # set the last modified time last_modified = dates.now() self.redis.set(lm_key, last_modified.isoformat(), ex=ttl) else: # is cached is_cached = True # if it does exist, deserialize it. obj = self.deserialize(obj) # get the cached last modified time last_modified = dates.parse_iso(self.redis.get(lm_key)) return CacheResponse(key, obj, last_modified, is_cached)
def _prepare_metric_date(obj): """ Round a metric to a configurable unit. """ u = settings.METRICS_MIN_DATE_UNIT v = settings.METRICS_MIN_DATE_VALUE # set current time if no time exists. if 'datetime' not in obj: return dates.floor_now(unit=u, value=v).isoformat() ds = obj.pop('datetime') dt = dates.parse_iso(ds, enforce_tz=True) return dates.floor(dt, unit=u, value=v).isoformat()
def run(self): """ Count shares for all content items. """ for content_item in self.api.orgs.simple_content(): created = dates.parse_iso(content_item['created']) if created < self.max_age: continue url = content_item.get('url') if url: data = shares.count(url) data.pop('url', None) data['content_item_id'] = content_item.get('id') yield data
def prepare_date(o, field): """ Prepare a date """ if field not in o: return None if o[field] is None: return None dt = dates.parse_iso(o[field]) if not dt: raise RequestError( '{}: {} is an invalid date.' .format(field, o[field])) return dt
def content_timeseries(obj, org_id=None, metrics_lookup=None, content_item_ids=None, commit=True): """ Ingest Timeseries Metrics for a content item. """ # if not content_item_id or not org or not metrics_lookup: # raise RequestError('Missing required kwargs.') content_item_id = obj.pop('content_item_id') if not content_item_id: raise RequestError('Object is missing a "content_item_id"') if not content_item_id in content_item_ids: raise RequestError( 'Content Item with ID {} doesnt exist'.format(content_item_id)) cmd_kwargs = {"org_id": org_id, "content_item_id": content_item_id} # parse datetime. if 'datetime' not in obj: cmd_kwargs['datetime'] = dates.floor_now(unit='hour', value=1).isoformat() else: ds = obj.pop('datetime') dt = dates.parse_iso(ds) cmd_kwargs['datetime'] = dates.floor(dt, unit='hour', value=1).isoformat() metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=['content_item', 'all'], check_timeseries=True) # upsert command cmd = """SELECT upsert_content_metric_timeseries( {org_id}, {content_item_id}, '{datetime}', '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs['metrics'] = metrics return cmd
def content_timeseries(obj, org_id=None, metrics_lookup=None, content_item_ids=None, commit=True): """ Ingest Timeseries Metrics for a content item. """ # if not content_item_id or not org or not metrics_lookup: # raise RequestError('Missing required kwargs.') content_item_id = obj.pop("content_item_id") if not content_item_id: raise RequestError('Object is missing a "content_item_id"') if not content_item_id in content_item_ids: raise RequestError("Content Item with ID {} doesnt exist".format(content_item_id)) cmd_kwargs = {"org_id": org_id, "content_item_id": content_item_id} # parse datetime. if "datetime" not in obj: cmd_kwargs["datetime"] = dates.floor_now(unit="hour", value=1).isoformat() else: ds = obj.pop("datetime") dt = dates.parse_iso(ds) cmd_kwargs["datetime"] = dates.floor(dt, unit="hour", value=1).isoformat() metrics = ingest_util.prepare_metrics( obj, metrics_lookup, valid_levels=["content_item", "all"], check_timeseries=True ) # upsert command cmd = """SELECT upsert_content_metric_timeseries( {org_id}, {content_item_id}, '{datetime}', '{metrics}') """.format( metrics=obj_to_json(metrics), **cmd_kwargs ) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs["metrics"] = metrics return cmd
def org_timeseries(obj, org_id=None, metrics_lookup=None, commit=True): """ Ingest Timeseries Metrics for an organization. """ cmd_kwargs = {'org_id': org_id} # parse datetime. if 'datetime' not in obj: cmd_kwargs['datetime'] = dates.floor_now(unit='hour', value=1).isoformat() else: ds = obj.pop('datetime') dt = dates.parse_iso(ds) cmd_kwargs['datetime'] = dates.floor(dt, unit='hour', value=1).isoformat() metrics = ingest_util.prepare_metrics(obj, metrics_lookup, valid_levels=['org', 'all'], check_timeseries=True) # upsert command cmd = """SELECT upsert_org_metric_timeseries( {org_id}, '{datetime}', '{metrics}') """.format(metrics=obj_to_json(metrics), **cmd_kwargs) if commit: try: db.session.execute(cmd) except Exception as err: raise RequestError(err.message) cmd_kwargs['metrics'] = metrics return cmd_kwargs return cmd
def publish_date(soup, source_url=None): """ Extract publish date from meta / source_url. """ # try isodate first for tag in PUBLISH_DATE_TAGS: ds = _extract_tag_data(soup, tag) if ds: dt = dates.parse_iso(ds, enforce_tz=False) if dt: return dt # try a timestamp next. for tag in PUBLISH_DATE_TAGS: ds = _extract_tag_data(soup, tag) if ds: dt = dates.parse_ts(ds) if dt: return dt # try any date next. for tag in PUBLISH_DATE_TAGS: ds = _extract_tag_data(soup, tag) if ds: dt = dates.parse_any(ds, enforce_tz=False) if dt: return dt # fallback on url regex if source_url: dm = re_url_date.search(source_url) if dm: ds = dm.group(0) dt = dates.parse_any(ds, enforce_tz=False) if dt: return dt
def created(self): if self.post.get('updated_time'): return dates.parse_iso(self.post['updated_time']) return None
def get_status(user, job_id): """ Get the status of a queued job. """ # parse args. queue = request.args.get('queue') if not queue: raise RequestError( 'You must pass in the queue name to fetch a job\'s status') if not queue in queues: raise RequestError( '"{}" is not a valid queue.' .format(queue)) q = queues.get(queue) job = q.fetch_job(job_id) if not job: raise RequestError( 'A job with ID {} does not exist' .format(job_id)) # fetch metadata about this job # from the session # parse args. started = request.args.get('started') orig_url = request.args.get('orig_url') if started: started = dates.parse_iso(started) # format return value ret = { 'job_id': job_id, 'queue': queue, 'status': None, 'started': started, 'orig_url': orig_url } # determine time since start if started: ret['time_since_start'] = (dates.now() - started).seconds # determine status if job.is_queued: ret['status'] = 'queued' if job.is_started: ret['status'] = 'running' if job.is_failed: ret['status'] = 'error' ret['message'] = "An unknown error occurred." if job.is_finished: rv = job.return_value # job will return true if successful if rv is True: ret['status'] = 'success' # job will return an error if unsuccessful else: ret['status'] = 'error' ret['message'] = str(rv.message) return jsonify(ret)
def get_status(user, job_id): """ Get the status of a queued job. """ # parse args. queue = request.args.get('queue') if not queue: raise RequestError( 'You must pass in the queue name to fetch a job\'s status') if not queue in queues: raise RequestError('"{}" is not a valid queue.'.format(queue)) q = queues.get(queue) job = q.fetch_job(job_id) if not job: raise RequestError('A job with ID {} does not exist'.format(job_id)) # fetch metadata about this job # from the session # parse args. started = request.args.get('started') orig_url = request.args.get('orig_url') if started: started = dates.parse_iso(started) # format return value ret = { 'job_id': job_id, 'queue': queue, 'status': None, 'started': started, 'orig_url': orig_url } # determine time since start if started: ret['time_since_start'] = (dates.now() - started).seconds # determine status if job.is_queued: ret['status'] = 'queued' if job.is_started: ret['status'] = 'running' if job.is_failed: ret['status'] = 'error' ret['message'] = "An unknown error occurred." if job.is_finished: rv = job.return_value # job will return true if successful if rv is True: ret['status'] = 'success' # job will return an error if unsuccessful else: ret['status'] = 'error' ret['message'] = rv.message return jsonify(ret)