class Grant(db.Model): id = db.Column(db.Integer, primary_key=True) user_id = db.Column(db.ForeignKey('user.id', ondelete='CASCADE')) user = db.relationship('User') client_id = db.Column(db.ForeignKey('client.client_id'), nullable=False) client = db.relationship('Client') code = db.Column(db.String(255), index=True, nullable=False) redirect_uri = db.Column(db.String(255)) expires = db.Column(db.DateTime) _scopes = db.Column(db.Text) def delete(self): db.session.delete(self) db.session.commit return self @property def scopes(self): if self._scopes: return self._scopes.split() return []
class ConceptConceptAssociation(BaseConceptAssociation): from_concept_slug = db.Column(db.String, db.ForeignKey('concept.slug', ondelete='CASCADE', onupdate='CASCADE'), primary_key=True) concept_slug = db.Column(db.String, db.ForeignKey('concept.slug', ondelete='CASCADE', onupdate='CASCADE'), primary_key=True) concept = db.relationship('Concept', backref=db.backref('from_concept_associations'), foreign_keys=[concept_slug])
class EventConceptAssociation(BaseConceptAssociation): __backref__ = 'event_associations' event_id = db.Column(db.Integer, db.ForeignKey('event.id', ondelete='CASCADE', onupdate='CASCADE'), primary_key=True)
class StoryConceptAssociation(BaseConceptAssociation): __backref__ = 'story_associations' story_id = db.Column(db.Integer, db.ForeignKey('story.id', ondelete='CASCADE', onupdate='CASCADE'), primary_key=True)
class ArticleConceptAssociation(BaseConceptAssociation): __backref__ = 'article_associations' article_id = db.Column(db.Integer, db.ForeignKey('article.id', ondelete='CASCADE', onupdate='CASCADE'), primary_key=True)
class Client(db.Model): client_id = db.Column(db.String(40), primary_key=True) client_secret = db.Column(db.String(55), unique=True, index=True, nullable=False) user_id = db.Column(db.ForeignKey('user.id')) user = db.relationship('User') name = db.Column(db.String(40)) desc = db.Column(db.String(400)) is_confidential = db.Column(db.Boolean) _redirect_uris = db.Column(db.Text) _default_scopes = db.Column(db.Text) _allowed_grant_types = db.Column(db.Text) def validate_scopes(self, scopes): for scope in scopes: if scope not in VALID_SCOPES: raise InvalidScope('Invalid scope.') return True def validate_grant_type(self, grant_type): if grant_type not in self.allowed_grant_types: raise InvalidGrantType('Invalid or missing grant type.') return True @property def client_type(self): if self.is_confidential: return 'confidential' return 'public' @property def redirect_uris(self): if self._redirect_uris: return self._redirect_uris.split() return [] @property def default_redirect_uri(self): return self.redirect_uris[0] @property def default_scopes(self): if self._default_scopes: return self._default_scopes.split() return [] @property def allowed_grant_types(self): if self._allowed_grant_types: return self._allowed_grant_types.split() return []
class Alias(Model): """ An alias (i.e. a name) for a concept. """ id = db.Column(db.Integer, primary_key=True) name = db.Column(db.UnicodeText) slug = db.Column( db.String, db.ForeignKey('concept.slug', ondelete='CASCADE', onupdate='CASCADE')) def __init__(self, name): self.name = name
class Token(db.Model): id = db.Column(db.Integer, primary_key=True) client_id = db.Column(db.ForeignKey('client.client_id'), nullable=False) client = db.relationship('Client') user_id = db.Column(db.ForeignKey('user.id')) user = db.relationship('User') # Currently OAuthLib only supports bearer tokens. token_type = db.Column(db.String(40)) access_token = db.Column(db.String(255), unique=True) refresh_token = db.Column(db.String(255), unique=True) expires = db.Column(db.DateTime) _scopes = db.Column(db.Text) @property def scopes(self): if self._scopes: return self._scopes.split() return []
class Feed(Model): """ A particular feed for a source, from which articles can be collected. """ id = db.Column(db.Integer, primary_key=True) ext_url = db.Column(db.Unicode, unique=True) errors = db.Column(db.Integer, default=0) created_at = db.Column(db.DateTime, default=datetime.utcnow) updated_at = db.Column(db.DateTime, default=datetime.utcnow) updating = db.Column(db.Boolean, default=False) articles = db.relationship('Article', backref='feed', lazy='dynamic') source_id = db.Column(db.Integer, db.ForeignKey('source.id'))
from argos.datastore import db from argos.core.models import Entity from argos.core.models.cluster import Cluster from argos.core.brain.cluster import cluster from argos.core.brain.summarize import multisummarize from argos.util.logger import logger stories_events = db.Table('stories_events', db.Column('story_id', db.Integer, db.ForeignKey('story.id'), primary_key=True), db.Column('event_id', db.Integer, db.ForeignKey('event.id'), primary_key=True) ) stories_entities = db.Table('stories_entities', db.Column('entity_slug', db.String, db.ForeignKey('entity.slug')), db.Column('story_id', db.Integer, db.ForeignKey('story.id')) ) class Story(Cluster): __tablename__ = 'story' __members__ = {'class_name': 'Event', 'secondary': stories_events, 'backref_name': 'stories'} __entities__ = {'secondary': stories_entities, 'backref_name': 'stories'} @property def events(self): """ Convenience :) """ return self.members @events.setter
from argos.datastore import db from argos.core.models.cluster import Cluster from argos.core.brain.cluster import cluster from argos.core.brain.summarize import summarize, multisummarize from argos.util.logger import logger from datetime import datetime events_articles = db.Table( 'events_articles', db.Column('event_id', db.Integer, db.ForeignKey('event.id'), primary_key=True), db.Column('article_id', db.Integer, db.ForeignKey('article.id'), primary_key=True)) events_entities = db.Table( 'events_entities', db.Column('entity_slug', db.String, db.ForeignKey('entity.slug')), db.Column('event_id', db.Integer, db.ForeignKey('event.id'))) class Event(Cluster): __tablename__ = 'event' __members__ = { 'class_name': 'Article', 'secondary': events_articles,
class Article(Clusterable): """ An article. """ __tablename__ = 'article' __entities__ = {'secondary': articles_entities, 'backref_name': 'articles'} vectors = db.Column(db.PickleType) title = db.Column(db.Unicode) text = db.Column(db.UnicodeText) html = db.Column(db.UnicodeText) ext_url = db.Column(db.Unicode) image = db.Column(db.String()) source_id = db.Column(db.Integer, db.ForeignKey('source.id')) authors = db.relationship('Author', secondary=articles_authors, backref=db.backref('articles', lazy='dynamic')) def __init__(self, **kwargs): for key in kwargs: setattr(self, key, kwargs[key]) if self.text is not None: self.entitize() self.vectorize() def vectorize(self): """ Returns a tuple of vectors representing this article. Articles are represented by: (bag of words vector, entities vector) """ if self.vectors is None: bow_vec = vectorize(self.text) ent_vec = vectorize(' '.join(entities(self.text))) self.vectors = [bow_vec, ent_vec] return self.vectors def entitize(self): """ Process the article text for entities. """ ents = [] for e_name in entities(self.text): # TO DO: Need to find a way of getting canonical name. # Search for the entity. slug = slugify(e_name) e = Entity.query.get(slug) # If one doesn't exist, create a new one. if not e: e = Entity(e_name) db.session.add(e) db.session.commit() ents.append(e) self.entities = ents def similarity(self, article): """ Calculate the similarity between this article and another article. """ # Compare the text vectors, # and the entity vectors. v = self.vectorize() v_ = article.vectorize() # Linearly combine the similarity values, # weighing them according to these coefficients. # [text vector, entity vector, publication date] coefs = [2, 1, 2] sim = 0 for i, vec in enumerate(v): dist = jaccard(v_[i], v[i]) # Two empty vectors returns a jaccard distance of NaN. # Set it to be 1, i.e. consider them completely different # (or, put more clearly, they have nothing in common) # FYI if jaccard runs on empty vectors, it will throw a warning. if isnan(dist): dist = 1 s = 1 - dist sim += (coefs[i] * s) # Also take publication dates into account. ideal_time = 259200 # 3 days, in seconds t, t_ = self.created_at, article.created_at # Subtract the more recent time from the earlier time. time_diff = t - t_ if t > t_ else t_ - t time_diff = time_diff.total_seconds() # Score is normalized [0, 1], where 1 is within the ideal time, # and approaches 0 the longer the difference is from the ideal time. time_score = 1 if time_diff < ideal_time else ideal_time / time_diff sim += (coefs[2] * time_score) # Normalize back to [0, 1]. return sim / sum(coefs)
class Auth(Model): """ Represents a third-party authentication. """ id = db.Column(db.BigInteger(), primary_key=True) provider = db.Column(db.String(255)) provider_id = db.Column(db.String(255)) access_token = db.Column(db.String(255)) _access_token_secret = db.Column('access_token_secret', db.LargeBinary(255)) user_id = db.Column(db.Integer, db.ForeignKey('user.id')) def __init__(self, provider, provider_id, access_token, access_token_secret=None): self.provider_id = provider_id self.provider = provider self.access_token = access_token self.access_token_secret = access_token_secret # Generate a unique id for this auth based on the provider and the provider id. self.id = Auth.gen_id(provider, provider_id) def update_token(self, access_token, access_token_secret=None): """ Updates token for an authentication. Enforcing that access tokens and their secrets must be updated in tandem. May need to revisit this decision later. Args: | access_token (str) -- the access token | access_token_secret (str) -- the access token secret """ # If the auth has a token and no secret, just update the token. if self.access_token and self.access_token_secret is None: self.access_token = access_token # Otherwise, the auth has a token and a secret, # and a new secret must be present. elif access_token_secret is None: raise Exception('This authentication requires a token secret, which was not specified.') else: self.access_token = access_token self.access_token_secret = access_token_secret @property def access_token_secret(self): if self._access_token_secret is not None: dec = AES.new(current_app.config['AES_KEY'], AES.MODE_CFB, current_app.config['AES_IV']) return dec.decrypt(self._access_token_secret).decode('utf-8') @access_token_secret.setter def access_token_secret(self, value): if value is not None: enc = AES.new(current_app.config['AES_KEY'], AES.MODE_CFB, current_app.config['AES_IV']) self._access_token_secret = enc.encrypt(value) @staticmethod def for_provider(provider, provider_id): """ Find an Auth instance by provider. Args: | provider (str) -- the provider name, e.g. 'twitter' | provider_id (str) -- the user id assigned by the provider """ id = Auth.gen_id(provider, provider_id) return Auth.query.get(id) @staticmethod def gen_id(provider, provider_id): """ Generates a unique id for an Auth. """ return hash(provider + provider_id)
from datetime import datetime from Crypto.Cipher import AES from argos.datastore import db, Model from flask import current_app from flask.ext.security import Security, UserMixin, RoleMixin # Table connecting users and roles roles_users = db.Table('roles_users', db.Column('user_id', db.Integer(), db.ForeignKey('user.id')), db.Column('role_id', db.Integer(), db.ForeignKey('role.id'))) # Table for users watching stories. users_stories = db.Table('users_stories', db.Column('user_id', db.Integer(), db.ForeignKey('user.id')), db.Column('story_id', db.Integer(), db.ForeignKey('story.id'))) # Table for users bookmarking events. users_events = db.Table('users_events', db.Column('user_id', db.Integer(), db.ForeignKey('user.id')), db.Column('event_id', db.Integer(), db.ForeignKey('event.id'))) # Table for users class AuthExistsForUserException(Exception): pass class Role(Model, RoleMixin): """ A user's Role
from argos.core.models.cluster import Clusterable from argos.core.brain import vectorize, entities from scipy.spatial.distance import jaccard from math import isnan from slugify import slugify # Ignore the invalid numpy warning, # which comes up when jaccard uses # empty vectors. import numpy numpy.seterr(invalid='ignore') articles_authors = db.Table( 'authors', db.Column('author_id', db.Integer, db.ForeignKey('author.id')), db.Column('article_id', db.Integer, db.ForeignKey('article.id'))) articles_entities = db.Table( 'articles_entities', db.Column('entity_slug', db.String, db.ForeignKey('entity.slug')), db.Column('article_id', db.Integer, db.ForeignKey('article.id'))) class Article(Clusterable): """ An article. """ __tablename__ = 'article' __entities__ = {'secondary': articles_entities, 'backref_name': 'articles'} vectors = db.Column(db.PickleType)
def concept_slug(cls): return db.Column(db.String, db.ForeignKey('concept.slug', ondelete='CASCADE', onupdate='CASCADE'), primary_key=True)
from argos.util import storage import galaxy as gx from slugify import slugify from datetime import datetime from os.path import splitext from sqlalchemy import event from sqlalchemy.ext.declarative import declared_attr from collections import Counter concepts_mentions = db.Table( 'concepts_mentions', db.Column( 'alias_id', db.Integer, db.ForeignKey('alias.id', ondelete='CASCADE', onupdate='CASCADE')), db.Column( 'concept_slug', db.String, db.ForeignKey('concept.slug', ondelete='CASCADE', onupdate='CASCADE'))) class BaseConceptAssociation(Model): """ Models which will be related to concepts must subclass this model and specify a backref name through a class property called `__backref__` and a foreign key property for the related model. Example:: class ArticleConceptAssociation(BaseConceptAssociation):
class Article(Clusterable): """ An article. """ __tablename__ = 'article' __concepts__ = { 'association_model': ArticleConceptAssociation, 'backref_name': 'article' } __mentions__ = {'secondary': articles_mentions, 'backref_name': 'articles'} title = db.Column(db.Unicode) text = db.Column(db.UnicodeText) html = db.Column(db.UnicodeText) ext_url = db.Column(db.Unicode) image = db.Column(db.String) ignore = db.Column(db.Boolean, default=False) score = db.Column(db.Float, default=0.0) source_id = db.Column(db.Integer, db.ForeignKey('source.id')) feed_id = db.Column(db.Integer, db.ForeignKey('feed.id')) node_id = db.Column(db.Integer, unique=True, index=True) authors = db.relationship('Author', secondary=articles_authors, backref=db.backref('articles', lazy='dynamic')) # There are some articles which are just noise, and we want to ignore them using regexes for their titles. ignore_patterns = [ # NYT country profiles re.compile(r'[A-Z].+\sprofile( - Overview)?') ] def __str__(self): return self.title def __repr__(self): return self.title def __init__(self, **kwargs): for key in kwargs: setattr(self, key, kwargs[key]) if self.text is not None: self.conceptize() if self.score is None: self.score = 0.0 self.check_ignored() def check_ignored(self): for pattern in self.ignore_patterns: if pattern.match(self.title): self.ignore = True break else: self.ignore = False return self.ignore def conceptize(self): """ Process the article text for concepts, and add the appropriate mentions. """ concepts = [] for c_name in gx.concepts(self.text): # Search for the concept. uri = knowledge.uri_for_name(c_name) if uri: slug = uri.split('/')[-1] else: slug = slugify(c_name) c = Concept.query.get(slug) # If an concept is found... if c: # Add this name as a new alias, if necessary. alias = Alias.query.filter_by(name=c_name, concept=c).first() if not alias: alias = Alias(c_name) c.aliases.append(alias) # Avoid duplicate aliases. if alias not in self.mentions: self.mentions.append(alias) # If one doesn't exist, create a new one. if not c: c = Concept(c_name) self.mentions.append(c.aliases[0]) db.session.add(c) db.session.commit() concepts.append(c) # Score the concepts' importance. total_found = len(concepts) counter = Counter(concepts) uniq_concepts = set(concepts) assocs = [] for concept in uniq_concepts: score = counter[concept] / total_found assoc = ArticleConceptAssociation(concept, score) assocs.append(assoc) self.concept_associations = assocs @property def published(self): """Convert datetime to seconds""" # If not timezone is set, assume UTC. # super annoying and it's probably not a good guess but it's # all we got for now. # In production, we will be setting article publish times as utc when # we fetch them, so it should be less of a problem there. if self.created_at.tzinfo is None: created_at = self.created_at.replace(tzinfo=pytz.UTC) delta = created_at - epoch return delta.total_seconds()