class Template(db.Model): __tablename__ = 'templates' __module__ = 'newslynx.models.template' id = db.Column(db.Integer, unique=True, index=True, primary_key=True) org_id = db.Column(db.Integer, db.ForeignKey('orgs.id'), index=True) name = db.Column(db.Text) slug = db.Column(db.Text, index=True) created = db.Column(db.DateTime(timezone=True), default=dates.now) updated = db.Column(db.DateTime(timezone=True), onupdate=dates.now, default=dates.now) template = db.Column(db.Text) format = db.Column(ENUM(*TEMPLATE_FORMATS, name="template_format_enum")) reports = db.relationship('Report', backref=db.backref('template', lazy='joined'), lazy='dynamic', cascade="all, delete-orphan") __table_args__ = (db.UniqueConstraint('org_id', 'slug'), ) def __init__(self, **kw): self.org_id = kw.get('org_id') self.name = kw.get('name') self.slug = kw.get('slug', slug(kw.get('name'))) self.template = kw.get('template') self.format = kw.get('format') self.data = kw.get('data') def to_dict(self): return { 'id': self.id, 'org_id': self.org_id, 'name': self.name, 'slug': self.slug, 'created': self.created, 'updated': self.updated, 'template': self.template, 'format': self.format } def render(self, **kw): """ Render this template. """ t = Tmpl(self.template) return t.render(**kw) def __repr__(self): return "<Template %r / %r >" % (self.org_id, self.slug)
class Setting(db.Model): __tablename__ = 'org_settings' __module__ = 'newslynx.models.setting' id = db.Column(db.Integer, unique=True, index=True, primary_key=True) user_id = db.Column(db.Integer, db.ForeignKey('users.id'), index=True) org_id = db.Column(db.Integer, db.ForeignKey('orgs.id'), index=True) name = db.Column(db.Text, index=True) value = db.Column(db.Text) level = db.Column(ENUM(*('orgs', 'me'), name='enum_setting_level')) json_value = db.Column(db.Boolean) __table_args__ = (db.UniqueConstraint('org_id', 'user_id', 'level', 'name'), ) def __init__(self, **kw): self.org_id = kw.get('org_id') self.user_id = kw.get('user_id') self.name = kw.get('name') self.json_value = kw.get('json_value', False) self.level = kw.get('level', 'org') if self.json_value: v = kw.get('value') if not isinstance(v, basestring): v = obj_to_json(v) self.value = v else: self.value = str(kw.get('value')) def to_dict(self): v = copy.copy(self.value) if self.json_value: v = json_to_obj(v) return { 'id': self.id, 'user_id': self.user_id, 'org_id': self.org_id, 'name': self.name, 'level': self.level, 'value': v, 'json_value': self.json_value } def __repr__(self): return "<Setting %r / %r >" % (self.name, self.value)
class Recipe(db.Model): __tablename__ = 'recipes' __module__ = 'newslynx.models.recipe' # id fields id = db.Column(db.Integer, unique=True, index=True, primary_key=True) sous_chef_id = db.Column( db.Integer, db.ForeignKey('sous_chefs.id'), index=True) user_id = db.Column( db.Integer, db.ForeignKey('users.id'), index=True) org_id = db.Column( db.Integer, db.ForeignKey('orgs.id'), index=True) # core fields name = db.Column(db.Text, index=True) slug = db.Column(db.Text, index=True) description = db.Column(db.Text) # date fields created = db.Column(db.DateTime(timezone=True), default=dates.now) updated = db.Column(db.DateTime(timezone=True), default=dates.now, onupdate=dates.now) last_run = db.Column(db.DateTime(timezone=True), index=True) # scheduler fields schedule_by = db.Column(ENUM(*RECIPE_SCHEDULE_TYPES, name="recipe_schedule_type_enum"), index=True) crontab = db.Column(db.Text) time_of_day = db.Column(db.Text) minutes = db.Column(db.Integer) status = db.Column( ENUM(*RECIPE_STATUSES, name="enum_recipe_statuses"), index=True) traceback = db.Column(db.Text) last_job = db.Column(JSON) # options options = db.Column(db.Text) options_hash = db.Column(db.Text) # relations events = db.relationship('Event', lazy='dynamic') content_items = db.relationship('ContentItem', lazy='dynamic') metrics = db.relationship('Metric', backref=db.backref('recipe', lazy='joined'), lazy='joined') sous_chef = db.relationship( 'SousChef', backref=db.backref('recipes', lazy='joined', cascade="all, delete-orphan"), lazy='joined') user = db.relationship( 'User', backref=db.backref('recipes', lazy='dynamic'), lazy='joined') __table_args__ = ( db.UniqueConstraint('org_id', 'name'), ) def __init__(self, sous_chef, **kw): """ A recipe must be initialized with an existing sous chef. """ # core fields self.name = kw.get('name') self.slug = slug(kw.get('slug', kw['name'])) self.description = kw.get('description') self.schedule_by = kw.get('schedule_by', 'unscheduled') self.crontab = kw.get('crontab') self.time_of_day = kw.get('time_of_day') self.minutes = kw.get('minutes') self.status = kw.get('status', 'stable') self.traceback = kw.get('traceback') self.set_options(kw.get('options', {})) # internal fields self.sous_chef_id = sous_chef.id self.user_id = kw.get('user_id') self.org_id = kw.get('org_id') self.last_run = kw.get('last_run', None) self.last_job = kw.get('last_job', {}) def set_options(self, opts): """ pickle dump the options. """ p = obj_to_pickle(opts) self.options = p self.options_hash = str(md5(p).hexdigest()) @property def scheduled(self): """ Is this recipe scheduled? """ return self.schedule_by != 'unscheduled' @property def active(self): """ Is this recipe scheduled? """ return self.status != 'inactive' @property def metric_names(self): return [m.name for m in self.metrics] @property def report_names(self): return [r.slug for r in self.reports] def to_dict(self, **kw): incl_reports = kw.get("incl_reports", True) d = { 'id': self.id, 'org_id': self.org_id, 'sous_chef': self.sous_chef.slug, 'name': self.name, 'slug': self.slug, 'description': self.description, 'created': self.created, 'updated': self.updated, 'last_run': self.last_run, 'schedule_by': self.schedule_by, 'crontab': self.crontab, 'time_of_day': self.time_of_day, 'minutes': self.minutes, 'status': self.status, 'traceback': self.traceback, 'last_job': self.last_job, 'options': pickle_to_obj(self.options) } if 'metrics' in self.sous_chef.creates: d['metrics'] = self.metric_names if incl_reports: if 'report' in self.sous_chef.creates: d['reports'] = self.report_names return d def __repr__(self): return '<Recipe %r >' % (self.slug)
class ContentItem(db.Model): """ A content-item is a unit of content to which we attach metrics. We do not initialize a content-item until we have past it completely through our single ingestion pipeline. At this point all content-items should have a standardized schema, though may not have all theses fields filled in. """ query_class = SearchQuery __tablename__ = 'content' __module__ = 'newslynx.models.content_item' # the ID is the global bitly hash. id = db.Column(db.Integer, unique=True, primary_key=True, index=True) org_id = db.Column( db.Integer, db.ForeignKey('orgs.id'), index=True) recipe_id = db.Column(db.Integer, db.ForeignKey('recipes.id'), index=True) type = db.Column(ENUM(*CONTENT_ITEM_TYPES, name='content_item_types_enum')) provenance = db.Column( ENUM(*CONTENT_ITEM_PROVENANCES, name='content_item_provenance_enum'), index=True) url = db.Column(db.Text, index=True) domain = db.Column(db.Text, index=True) created = db.Column(db.DateTime(timezone=True), default=dates.now) updated = db.Column( db.DateTime(timezone=True), onupdate=dates.now, default=dates.now) site_name = db.Column(db.Text, index=True) favicon = db.Column(db.Text) img_url = db.Column(db.Text) thumbnail = db.Column(db.Text) title = db.Column(db.Text) description = db.Column(db.Text) body = db.Column(db.Text) active = db.Column(db.Boolean, index=True) meta = db.Column(JSON) # relations tags = db.relationship( 'Tag', secondary=relations.content_items_tags, backref=db.backref('content_items', lazy='dynamic'), lazy='joined') events = db.relationship( 'Event', secondary=relations.content_items_events, backref=db.backref('content_items', lazy='dynamic'), lazy='dynamic') authors = db.relationship( 'Author', secondary=relations.content_items_authors, backref=db.backref('content_items', lazy='dynamic'), lazy='joined') summary_metrics = db.relationship( 'ContentMetricSummary', lazy='joined', uselist=False, cascade="all, delete-orphan") timeseries_metrics = db.relationship( 'ContentMetricTimeseries', lazy='dynamic', cascade="all, delete-orphan") # # in/out links # out_links = db.relationship( # 'ContentItem', secondary=relations.content_items_content_items, # primaryjoin=relations.content_items_content_items.c.from_content_item_id == id, # secondaryjoin=relations.content_items_content_items.c.to_content_item_id == id, # backref=db.backref("in_links", lazy='dynamic'), # lazy='dynamic') # search vectors title_search_vector = db.Column(TSVectorType('title')) body_search_vector = db.Column(TSVectorType('body')) description_search_vector = db.Column(TSVectorType('description')) meta_search_vector = db.Column(TSVectorType('meta')) # content_items should be unique to org, url, and type. # IE there might be multiple content_items per url - # an article, a video, a podcast, etc. __table_args__ = ( db.UniqueConstraint( 'org_id', 'url', 'type', name='content_item_unique_constraint'), Index('content_item_title_search_vector_idx', 'title_search_vector', postgresql_using='gin'), Index('content_item_body_search_vector_idx', 'body_search_vector', postgresql_using='gin'), Index('content_item_description_search_vector_idx', 'description_search_vector', postgresql_using='gin'), Index('content_item_meta_search_vector_idx', 'meta_search_vector', postgresql_using='gin') ) def __init__(self, **kw): self.org_id = kw.get('org_id') self.recipe_id = kw.get('recipe_id') self.url = kw.get('url') self.type = kw.get('type') self.provenance = kw.get('provenance', 'recipe') self.domain = kw.get('domain') self.created = kw.get('created', dates.now()) self.site_name = kw.get('site_name') self.favicon = kw.get('favicon') self.img_url = kw.get('img_url') self.thumbnail = kw.get('thumbnail') self.title = kw.get('title') self.description = kw.get('description') self.body = kw.get('body') self.active = kw.get('active', True) self.meta = kw.get('meta', {}) @property def simple_authors(self): return [{"id": a.id, "name": a.name} for a in self.authors] @property def author_ids(self): return [a.id for a in self.authors] # @property # def out_link_ids(self): # out_links = db.session.query(relations.content_items_content_items.c.to_content_item_id)\ # .filter(relations.content_items_content_items.c.from_content_item_id == self.id)\ # .all() # return [o[0] for o in out_links] # @property # def in_link_ids(self): # in_links = db.session.query(relations.content_items_content_items.c.from_content_item_id)\ # .filter(relations.content_items_content_items.c.to_content_item_id == self.id)\ # .all() # return [o[0] for o in in_links] # @property # def out_link_display(self): # out_links = self.out_links\ # .with_entities(ContentItem.id, ContentItem.title)\ # .all() # return [dict(zip(['id', 'title'], l)) for l in out_links] # @property # def in_link_display(self): # in_links = self.in_links\ # .with_entities(ContentItem.id, ContentItem.title)\ # .all() # return [dict(zip(['id', 'title'], l)) for l in in_links] @property def tag_ids(self): return [t.id for t in self.tags] @property def subject_tag_ids(self): return [t.id for t in self.tags if t.type == 'subject'] @property def impact_tag_ids(self): return [t.id for e in self.events for t in e.tags if t.type == 'impact'] @property def event_ids(self): return [e.id for e in self.events] def to_dict(self, **kw): # incl_links = kw.get('incl_links', False) incl_body = kw.get('incl_body', False) incl_metrics = kw.get('incl_metrics', True) incl_img = kw.get('incl_img', False) d = { 'id': self.id, 'org_id': self.org_id, 'recipe_id': self.recipe_id, 'url': self.url, 'domain': self.domain, 'provenance': self.provenance, 'type': self.type, 'created': self.created, 'updated': self.updated, 'favicon': self.favicon, 'site_name': self.site_name, 'authors': self.simple_authors, 'title': self.title, 'description': self.description, 'subject_tag_ids': self.subject_tag_ids, 'impact_tag_ids': self.impact_tag_ids, 'active': self.active, 'meta': self.meta } # if incl_links: # d['in_links'] = self.in_link_display # d['out_links'] = self.out_link_display if incl_body: d['body'] = self.body if incl_metrics: if self.summary_metrics: d['metrics'] = self.summary_metrics.metrics else: d['metrics'] = {} if incl_img: d['thumbnail'] = self.thumbnail d['img_url'] = self.img_url return d def __repr__(self): return '<ContentItem %r / %r >' % (self.url, self.type)
class Metric(db.Model): """ A metric is a concept created by a recipe and (ultimately) associated with an org or an org and a content-item. Metrics are primarily created in SousChef configurations. For instance, the Google Analytics Sous Chef will specify metadata about pageviews / time on page / entrances / exits / etc. When a recipe associated with this sous chef is created, records for each of these metrics will be inserted into this table. ## TODO: Computed metrics are formulas of existing metrics. For a computed metric to be valid it's associated metrics must exist on the same level. You cannot compute metrics from computed metrics. """ __tablename__ = 'metrics' __module__ = 'newslynx.models.metric' id = db.Column(db.Integer, unique=True, primary_key=True, index=True) org_id = db.Column( db.Integer, db.ForeignKey('orgs.id'), index=True ) recipe_id = db.Column(db.Integer, db.ForeignKey('recipes.id'), index=True) name = db.Column(db.Text, index=True) display_name = db.Column(db.Text) description = db.Column(db.Text) type = db.Column(ENUM(*METRIC_TYPES, name='metric_types_enum'), index=True) agg = db.Column(ENUM(*METRIC_AGGS, name='metric_aggs_enum'), index=True) content_levels = db.Column(ARRAY(db.Text), index=True) org_levels = db.Column(ARRAY(db.Text), index=True) faceted = db.Column(db.Boolean, index=True, default=False) computed = db.Column(db.Boolean, index=True, default=False) formula = db.Column(db.Text) created = db.Column(db.DateTime(timezone=True), default=dates.now) updated = db.Column( db.DateTime(timezone=True), onupdate=dates.now, default=dates.now) __table_args__ = ( db.UniqueConstraint('org_id', 'name', 'type'), ) def __init__(self, **kw): self.org_id = kw.get('org_id') self.recipe_id = kw.get('recipe_id') self.name = kw.get('name') self.description = kw.get('description') self.display_name = kw.get('display_name') self.type = kw.get('type') self.agg = kw.get('agg', TYPE_TO_AGG_FX.get(kw.get('type'))) self.content_levels = kw.get('content_levels', []) self.org_levels = kw.get('org_levels', []) self.faceted = kw.get('faceted', False) self.formula = kw.get('formula') @property def computed(self): return self.type == 'computed' @property def formula_requires(self): return computed_metric_schema.required_metrics(self.formula) def to_dict(self): d = { 'id': self.id, 'org_id': self.org_id, 'recipe_id': self.recipe_id, 'name': self.name, 'display_name': self.display_name, 'description': self.description, 'type': self.type, 'agg': self.agg, 'content_levels': self.content_levels, 'org_levels': self.org_levels, 'faceted': self.faceted, 'created': self.created, 'updated': self.updated } if self.computed: d['formula'] = self.formula d['formula_requires'] = self.formula_requires return d def __repr__(self): return '<Metric %r >' % (self.name)
class Author(db.Model): """ An author of a content-item. A unique author is a combination of an org_id and a name. """ __tablename__ = 'authors' __module__ = 'newslynx.models.author' query_class = SearchQuery # the ID is the global bitly hash. id = db.Column(db.Integer, unique=True, primary_key=True, index=True) org_id = db.Column(db.Integer, db.ForeignKey('orgs.id'), index=True) name = db.Column(db.Text, index=True) img_url = db.Column(db.Text) created = db.Column(db.DateTime(timezone=True), default=dates.now) updated = db.Column(db.DateTime(timezone=True), onupdate=dates.now, default=dates.now) __table_args__ = (db.UniqueConstraint('org_id', 'name'), ) # our search vector search_vector = db.Column(TSVectorType('name')) def __init__(self, **kw): self.org_id = kw.get('org_id') self.name = kw.get('name').upper() self.img_url = kw.get('img_url') def fetch_content_items(self): return self.content_items\ .order_by("content.created desc")\ .all() @property def content_item_ids(self): return [c.id for c in self.content_items] def simple_content(self, **kw): incl_metrics = kw.get('incl_metrics', False) output = [] for c in self.fetch_content_items(): d = { 'id': c.id, 'title': c.title, 'description': c.description, 'created': c.created, 'img_url': c.img_url } if incl_metrics: d['metrics'] = c.summary_metric.metrics output.append(d) return output def to_dict(self, **kw): incl_content = kw.get('incl_content', False) d = { 'id': self.id, 'org_id': self.org_id, 'name': self.name.title(), 'img_url': self.img_url, 'created': self.created, 'updated': self.updated, } if incl_content: d['content_items'] = self.simple_content(**kw) return d def __repr__(self): return '<Author %r >' % (self.name)
class Tag(db.Model): """ A tag is an arbitrary label which we can assign to a content-item or an event. """ __tablename__ = 'tags' __module__ = 'newslynx.models.tag' id = db.Column(db.Integer, primary_key=True) org_id = db.Column( db.Integer, db.ForeignKey('orgs.id'), index=True) name = db.Column(db.Text) slug = db.Column(db.Text, index=True) created = db.Column(db.DateTime(timezone=True), default=dates.now) updated = db.Column(db.DateTime(timezone=True), onupdate=dates.now, default=dates.now) color = db.Column(db.Text) type = db.Column(ENUM(*TAG_TYPES, name='tag_type_enum')) category = db.Column(ENUM(*IMPACT_TAG_CATEGORIES, name='tag_categories_enum')) level = db.Column(ENUM(*IMPACT_TAG_LEVELS, name='tag_levels_enum')) __table_args__ = ( db.UniqueConstraint('org_id', 'slug', 'type'), ) def __init__(self, **kw): self.org_id = kw.get('org_id') self.name = kw.get('name') self.slug = slugify(kw.get('slug', kw['name'])) self.type = kw.get('type') self.color = kw.get('color') self.category = kw.get('category') self.level = kw.get('level') def to_dict(self): if self.type == 'impact': return { 'id': self.id, 'org_id': self.org_id, 'name': self.name, 'slug': self.slug, 'type': self.type, 'color': self.color.lower(), 'category': self.category, 'level': self.level, 'created': self.created, 'updated': self.updated } else: return { 'id': self.id, 'org_id': self.org_id, 'name': self.name, 'slug': self.slug, 'type': self.type, 'color': self.color.lower(), 'created': self.created, 'updated': self.updated } def __repr__(self): return '<Tag %r / %r >' % (self.name, self.type)
class Event(db.Model): """ An event is a significant moment in the life of a thing / org. """ query_class = SearchQuery __tablename__ = 'events' __module__ = 'newslynx.models.event' id = db.Column(db.Integer, unique=True, primary_key=True, index=True) # the unique id from the source. source_id = db.Column(db.Text, index=True) org_id = db.Column( db.Integer, db.ForeignKey('orgs.id'), index=True) recipe_id = db.Column(db.Integer, db.ForeignKey('recipes.id'), index=True) status = db.Column( ENUM(*EVENT_STATUSES, name='event_status_enum'), index=True) provenance = db.Column( ENUM(*EVENT_PROVENANCES, name='event_provenance_enum'), index=True) url = db.Column(db.Text, index=True) domain = db.Column(db.Text, index=True) img_url = db.Column(db.Text) thumbnail = db.Column(db.Text) created = db.Column(db.DateTime(timezone=True), default=dates.now) updated = db.Column(db.DateTime(timezone=True), onupdate=dates.now, default=dates.now) title = db.Column(db.Text) description = db.Column(db.Text) body = db.Column(db.Text) authors = db.Column(ARRAY(String)) meta = db.Column(JSON) # search vectors title_search_vector = db.Column(TSVectorType('title')) description_search_vector = db.Column(TSVectorType('description')) body_search_vector = db.Column(TSVectorType('body')) authors_search_vector = db.Column(TSVectorType('authors')) meta_search_vector = db.Column(TSVectorType('meta')) # relations tags = db.relationship('Tag', secondary=relations.events_tags, backref=db.backref('events', lazy='dynamic'), lazy='joined') # relations __table_args__ = ( db.UniqueConstraint( 'source_id', 'org_id', name='event_unique_constraint'), Index('events_title_search_vector_idx', 'title_search_vector', postgresql_using='gin'), Index('events_description_search_vector_idx', 'description_search_vector', postgresql_using='gin'), Index('events_body_search_vector_idx', 'body_search_vector', postgresql_using='gin'), Index('events_authors_search_vector_idx', 'authors_search_vector', postgresql_using='gin'), Index('events_meta_search_vector_idx', 'meta_search_vector', postgresql_using='gin') ) def __init__(self, **kw): self.source_id = str(kw.get('source_id')) self.recipe_id = kw.get('recipe_id') self.org_id = kw.get('org_id') self.status = kw.get('status', 'pending') self.provenance = kw.get('provenance', 'recipe') self.url = kw.get('url') self.domain = kw.get('domain', url.get_domain(kw.get('url', None))) self.img_url = kw.get('img_url') self.thumbnail = kw.get('thumbnail') self.created = kw.get('created', dates.now()) self.title = kw.get('title') self.description = kw.get('description') self.body = kw.get('body') self.authors = kw.get('authors', []) self.meta = kw.get('meta', {}) @property def simple_content_items(self): content_items = [] for t in self.content_items: content_items.append({ 'id': t.id, 'title': t.title, 'url': t.url }) return content_items @property def content_item_ids(self): return [t.id for t in self.content_items] @property def tag_ids(self): return [t.id for t in self.tags] @property def tag_count(self): return len(self.tags) def to_dict(self, **kw): d = { 'id': self.id, 'recipe_id': self.recipe_id, 'source_id': self.source_id, 'status': self.status, 'provenance': self.provenance, 'url': self.url, 'created': self.created, 'updated': self.updated, 'title': self.title, 'description': self.description, 'authors': self.authors, 'meta': self.meta, 'tag_ids': self.tag_ids, 'content_items': self.simple_content_items, } if kw.get('incl_body', False): d['body'] = self.body if kw.get('incl_img', False): d['thumbnail'] = self.thumbnail d['img_url'] = self.img_url return d def __repr__(self): return '<Event %r>' % (self.title)