class Grant(db.Model): id = db.Column(db.Integer, primary_key=True) user_id = db.Column(db.ForeignKey('user.id', ondelete='CASCADE')) user = db.relationship('User') client_id = db.Column(db.ForeignKey('client.client_id'), nullable=False) client = db.relationship('Client') code = db.Column(db.String(255), index=True, nullable=False) redirect_uri = db.Column(db.String(255)) expires = db.Column(db.DateTime) _scopes = db.Column(db.Text) def delete(self): db.session.delete(self) db.session.commit return self @property def scopes(self): if self._scopes: return self._scopes.split() return []
class Client(db.Model): client_id = db.Column(db.String(40), primary_key=True) client_secret = db.Column(db.String(55), unique=True, index=True, nullable=False) user_id = db.Column(db.ForeignKey('user.id')) user = db.relationship('User') name = db.Column(db.String(40)) desc = db.Column(db.String(400)) is_confidential = db.Column(db.Boolean) _redirect_uris = db.Column(db.Text) _default_scopes = db.Column(db.Text) _allowed_grant_types = db.Column(db.Text) def validate_scopes(self, scopes): for scope in scopes: if scope not in VALID_SCOPES: raise InvalidScope('Invalid scope.') return True def validate_grant_type(self, grant_type): if grant_type not in self.allowed_grant_types: raise InvalidGrantType('Invalid or missing grant type.') return True @property def client_type(self): if self.is_confidential: return 'confidential' return 'public' @property def redirect_uris(self): if self._redirect_uris: return self._redirect_uris.split() return [] @property def default_redirect_uri(self): return self.redirect_uris[0] @property def default_scopes(self): if self._default_scopes: return self._default_scopes.split() return [] @property def allowed_grant_types(self): if self._allowed_grant_types: return self._allowed_grant_types.split() return []
class Source(Model): """ An article source. """ id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String(255), unique=True) icon = db.Column(db.String(255)) # Keep articles on the Source so if an # article's feed dies, we still know where the Article came from. articles = db.relationship('Article', backref='source', lazy='dynamic') feeds = db.relationship('Feed', backref='source')
class Role(Model, RoleMixin): """ A user's Role Attributes: * id -> Integer (Primary Key) * name -> String (Unique) * description -> String """ id = db.Column(db.Integer(), primary_key=True) name = db.Column(db.String(80), unique=True) description = db.Column(db.String(255))
class Source(Model): """ A feed source. """ id = db.Column(db.Integer, primary_key=True) ext_url = db.Column(db.Unicode) name = db.Column(db.String(255)) errors = db.Column(db.Integer, default=0) articles = db.relationship('Article', backref='source', lazy='dynamic')
class Token(db.Model): id = db.Column(db.Integer, primary_key=True) client_id = db.Column(db.ForeignKey('client.client_id'), nullable=False) client = db.relationship('Client') user_id = db.Column(db.ForeignKey('user.id')) user = db.relationship('User') # Currently OAuthLib only supports bearer tokens. token_type = db.Column(db.String(40)) access_token = db.Column(db.String(255), unique=True) refresh_token = db.Column(db.String(255), unique=True) expires = db.Column(db.DateTime) _scopes = db.Column(db.Text) @property def scopes(self): if self._scopes: return self._scopes.split() return []
class Entity(Model): """ An entity, which could be a place, person, organization, concept, topic, etc. """ name = db.Column(db.UnicodeText) slug = db.Column(db.String(255), primary_key=True) updated_at = db.Column(db.DateTime, default=datetime.utcnow) def __init__(self, name): self.name = name self.slug = slugify(self.name)
class Auth(Model): """ Represents a third-party authentication. """ id = db.Column(db.BigInteger(), primary_key=True) provider = db.Column(db.String(255)) provider_id = db.Column(db.String(255)) access_token = db.Column(db.String(255)) _access_token_secret = db.Column('access_token_secret', db.LargeBinary(255)) user_id = db.Column(db.Integer, db.ForeignKey('user.id')) def __init__(self, provider, provider_id, access_token, access_token_secret=None): self.provider_id = provider_id self.provider = provider self.access_token = access_token self.access_token_secret = access_token_secret # Generate a unique id for this auth based on the provider and the provider id. self.id = Auth.gen_id(provider, provider_id) def update_token(self, access_token, access_token_secret=None): """ Updates token for an authentication. Enforcing that access tokens and their secrets must be updated in tandem. May need to revisit this decision later. Args: | access_token (str) -- the access token | access_token_secret (str) -- the access token secret """ # If the auth has a token and no secret, just update the token. if self.access_token and self.access_token_secret is None: self.access_token = access_token # Otherwise, the auth has a token and a secret, # and a new secret must be present. elif access_token_secret is None: raise Exception('This authentication requires a token secret, which was not specified.') else: self.access_token = access_token self.access_token_secret = access_token_secret @property def access_token_secret(self): if self._access_token_secret is not None: dec = AES.new(current_app.config['AES_KEY'], AES.MODE_CFB, current_app.config['AES_IV']) return dec.decrypt(self._access_token_secret).decode('utf-8') @access_token_secret.setter def access_token_secret(self, value): if value is not None: enc = AES.new(current_app.config['AES_KEY'], AES.MODE_CFB, current_app.config['AES_IV']) self._access_token_secret = enc.encrypt(value) @staticmethod def for_provider(provider, provider_id): """ Find an Auth instance by provider. Args: | provider (str) -- the provider name, e.g. 'twitter' | provider_id (str) -- the user id assigned by the provider """ id = Auth.gen_id(provider, provider_id) return Auth.query.get(id) @staticmethod def gen_id(provider, provider_id): """ Generates a unique id for an Auth. """ return hash(provider + provider_id)
class User(Model, UserMixin): """ A user Attributes: * id -> Integer (Primary Key) * email -> String (Unique) * password -> String (Unique) * active -> Bool * confirmed_at -> DateTime * roles -> [Role] """ id = db.Column(db.Integer(), primary_key=True) email = db.Column(db.String(255), unique=True) image = db.Column(db.String(255), unique=True) name = db.Column(db.String(255), unique=True) password = db.Column(db.String(255)) active = db.Column(db.Boolean()) confirmed_at = db.Column(db.DateTime()) auths = db.relationship('Auth', backref='user', lazy='dynamic') roles = db.relationship('Role', secondary=roles_users, backref=db.backref('users', lazy='dynamic')) watching = db.relationship('Story', secondary=users_stories, backref=db.backref('watchers', lazy='joined')) bookmarked = db.relationship('Event', secondary=users_events, backref=db.backref('bookmarkers', lazy='joined')) created_at = db.Column(db.DateTime, default=datetime.utcnow) updated_at = db.Column(db.DateTime, default=datetime.utcnow) def __init__(self, auth=None, **kwargs): for key in kwargs: setattr(self, key, kwargs[key]) def add_provider(self, provider, provider_id, access_token, access_token_secret=None, update=True): """ Add a new provider authentication to this user. Raises an AuthExistsForUserException if this authentication already exists and is associated with another user. Args: | provider (str) -- the provider name, e.g. 'twitter' | provider_id (str) -- the id assigned by the provider | access_token (str) -- the access token | access_token_secret (str) -- the access token secret | update (bool) -- whether or not to update the existing provider authentication, if found (default: True) """ # Check to see if this auth already exists. auth = Auth.for_provider(provider, provider_id) if auth: if auth.user is not self: raise AuthExistsForUserException('Found an existing authorization for {0} associated with another user.'.format(provider)) elif update: auth.update_token(access_token, access_token_secret) else: auth = Auth(provider, provider_id, access_token, access_token_secret) auth.user = self db.session.add(auth) db.session.commit() return auth def merge(self, user): """ Merge this user with another user, where *this* user is considered the canonical user (i.e. its attributes are preferred over the other user's). UI tip: prompt the user to pick which account is their primary one! """ providers = [auth.provider for auth in self.auths] for auth in user.auths: # In the event that the merged user has authentications # which conflict with one on this user, prefer the one on this user. # I don't anticipate this will happen, but it's possible, e.g. if a user # has two twitter accts and authenticates each on different user accts here. if auth.provider not in providers: auth.user = self db.session.delete(user) db.session.commit() @staticmethod def for_provider(provider, provider_id): """ Find an User instance by provider. Args: | provider (str) -- the provider name, e.g. 'twitter' | provider_id (str) -- the user id assigned by the provider """ auth = Auth.for_provider(provider, provider_id) if auth: return auth.user return None
class Concept(Model): """ An concept, which could be a place, person, organization, topic, etc. You should *not* set the `slug` or `uri`; they are set automatically according to the `name`. In the spirit of Python's developer maturity, you're trusted not to modify them. """ name = db.Column(db.UnicodeText) slug = db.Column(db.String(255), primary_key=True) uri = db.Column(db.String) summary = db.Column(db.UnicodeText) image = db.Column(db.String) updated_at = db.Column(db.DateTime, default=datetime.utcnow) created_at = db.Column(db.DateTime, default=datetime.utcnow) aliases = db.relationship('Alias', backref='concept', lazy='joined') commonness = db.Column(db.Float, default=0.0) # Mapping concepts to concepts, # and tracking mentions of other concepts in this concept's summary. mentions = db.relationship('Alias', secondary=concepts_mentions, backref=db.backref('concepts')) concept_associations = db.relationship( ConceptConceptAssociation, foreign_keys=[ConceptConceptAssociation.from_concept_slug], backref=db.backref('from_concept'), cascade='all, delete-orphan') _sources = ['Wikipedia', 'DBpedia'] def __init__(self, name): """ Initialize a concept by a name, which can be an alias (it does not have to be the canonical name). This specified name will be saved as an Alias. A canonical name will be looked for; if one is found it will be used as the slug for this Concept. """ self.aliases.append(Alias(name)) # Try to get a canonical URI # and derive the slug from that. self.uri = knowledge.uri_for_name(name) if self.uri: self.slug = self.uri.split('/')[-1] k = knowledge.knowledge_for(uri=self.uri, fallback=True) self.commonness = knowledge.commonness_for_uri(self.uri) self.summary = k['summary'] self.name = k['name'] # Download the image. if k['image'] is not None: ext = splitext(k['image'])[-1].lower() self.image = storage.save_from_url( k['image'], '{0}{1}'.format(hash(self.slug), ext)) # If no URI was found, # generate our own slug. # Note: A problem here is that it assumes that # this particular name is the canonical one, # and that we don't collect any information for it. else: self.slug = slugify(name) # Commonness is set to default of 0.0, # which makes sense because if there's no URI for it # it probably is not common at all. @property def names(self): return [alias.name for alias in self.aliases] @property def sources(self): """ Returns the data sources used for this concept. """ return self._sources @property def concepts(self): """ Returns the concepts this concept points *to*, with their importance scores for this concept. """ if self.summary and not len(self.concept_associations): self.conceptize() def with_score(assoc): assoc.concept.score = assoc.score return assoc.concept return list(map(with_score, self.concept_associations)) @property def from_concepts(self): """ Returns the concepts that points to this concept, with their importance scores for this concept. """ def with_score(assoc): assoc.from_concept.score = assoc.score return assoc.from_concept return list(map(with_score, self.from_concept_associations)) @property def stories(self): """ Return the stories associated with this concept, adding an additional "relatedness" value which is the concept's importance score for a particular story. """ def with_score(assoc): assoc.story.relatedness = assoc.score return assoc.story return list(map(with_score, self.story_associations)) @property def events(self): """ Same as the `stories` property but for events. """ def with_score(assoc): assoc.event.relatedness = assoc.score return assoc.event return list(map(with_score, self.event_associations)) @property def articles(self): """ Same as the `stories` property but for articles. """ def with_score(assoc): assoc.article.relatedness = assoc.score return assoc.article return list(map(with_score, self.article_associations)) @property def related_concepts(self): return self.to_concepts + self.from_concepts @property def profile(self): """ Returns a data profile specifically for this concept's type. """ if not hasattr(self, '_profile') or not self._profile: self._profile = knowledge.profiles.get_profile(self.uri) self._sources += self._profile.get('sources', []) return self._profile def conceptize(self): """ Process the concept summary for concepts, and add the appropriate mentions. """ concepts = [] for c_name in gx.concepts(self.summary): # Search for the concept. uri = knowledge.uri_for_name(c_name) if uri: slug = uri.split('/')[-1] else: slug = slugify(c_name) c = Concept.query.get(slug) # If an concept is found... if c: # Add this name as a new alias, if necessary. alias = Alias.query.filter_by(name=c_name, concept=c).first() if not alias: alias = Alias(c_name) c.aliases.append(alias) self.mentions.append(alias) # If one doesn't exist, create a new one. if not c: c = Concept(c_name) self.mentions.append(c.aliases[0]) db.session.add(c) db.session.commit() concepts.append(c) # Score the concepts' importance. total_found = len(concepts) counter = Counter(concepts) uniq_concepts = set(concepts) assocs = [] for concept in uniq_concepts: score = (counter[concept] - concept.commonness) / total_found assoc = ConceptConceptAssociation(concept, score) assocs.append(assoc) self.concept_associations = assocs
class Cluster(Clusterable): """ A cluster. A Cluster is capable of clustering Clusterables. Note: A Cluster itself is a Clusterable; i.e. clusters can cluster clusters :) """ __abstract__ = True title = db.Column(db.Unicode, default='') summary = db.Column(db.UnicodeText) image = db.Column(db.String()) def __str__(self): return self.title def __repr__(self): return self.title @declared_attr def members(cls): """ Build the members attribute from the subclass's `__members__` class attribute. Example:: __members__ = {'class_name': 'Article', 'secondary': events_articles, 'backref_name': 'events'} """ args = cls.__members__ return db.relationship(args['class_name'], secondary=args['secondary'], backref=db.backref(args['backref_name']), lazy='dynamic') @staticmethod def cluster(cls, clusterables): """ The particular clustering method for this Cluster class. Must be implemented on subclasses, otherwise raises NotImplementedError. """ raise NotImplementedError def __init__(self, members): """ Initialize a cluster with some members. """ self.members = members self.update() def summarize(self): """ Generate a summary for this cluster. """ if len(self.members) == 1: member = self.members[0] self.summary = ' '.join(summarize(member.title, member.text)) else: self.summary = ' '.join( multisummarize([m.text for m in self.members])) return self.summary def conceptize(self): """ Update concepts (and mentions) for this cluster and score them. """ self.mentions = list( set( chain.from_iterable( [member.mentions for member in self.members]))) # Get all concept associations for this cluster's members. assocs = chain.from_iterable( [member.concept_associations for member in self.members]) # Group associations by their concept. # Since `groupby` only looks at adjacent elements, # we have to first sort the associations by their concepts' slugs. key_func = lambda assoc: assoc.concept.slug grouped_assocs = [ list(g) for k, g in groupby(sorted(assocs, key=key_func), key_func) ] # Calculate the raw scores of each concept. raw_scores = {} for assoc_group in grouped_assocs: # Each group points to the same concept, so just grab the first. concept = assoc_group[0].concept raw_scores[concept] = sum(assoc.score for assoc in assoc_group) total = sum(raw_scores.values()) # Calculate the final scores and create the associations. assocs = [] for concept, raw_score in raw_scores.items(): score = (raw_score / total) / (concept.commonness + 1 ) # +1 to avoid division by zero assoc = self.__class__.__concepts__['association_model']( concept, score) # this is nuts assocs.append(assoc) self.concept_associations = assocs def update(self): """ Update the cluster's attributes, optionally saving (saves by default). """ self.updated_at = datetime.utcnow() self.created_at = datetime.utcnow() self.summarize() self.conceptize() def add(self, member): """ Add an member to the cluster. """ self.members.append(member) def timespan(self, start, end=None): """ Get cluster members within a certain (date)timespan. Args: | start (datetime) | end (datetime) -- default is now (UTC) """ if end is None: end = datetime.utcnow() return [ member for member in self.members if start < member.created_at < end ]
class Cluster(Clusterable): """ A cluster. A Cluster is capable of clustering Clusterables. Note: A Cluster itself is a Clusterable; i.e. clusters can cluster clusters :) """ __abstract__ = True title = db.Column(db.Unicode) summary = db.Column(db.UnicodeText) image = db.Column(db.String()) @declared_attr def members(cls): """ Build the members attribute from the subclass's `__members__` class attribute. Example:: __members__ = {'class_name': 'Article', 'secondary': events_articles, 'backref_name': 'events'} """ args = cls.__members__ return db.relationship(args['class_name'], secondary=args['secondary'], backref=db.backref(args['backref_name'])) @staticmethod def cluster(cls, clusterables): """ The particular clustering method for this Cluster class. Must be implemented on subclasses, otherwise raises NotImplementedError. """ raise NotImplementedError def __init__(self, members): """ Initialize a cluster with some members and a tag. Tags are used to keep track of "levels" or "kinds" of clusters. """ self.members = members self.update() def summarize(self): """ Generate a summary for this cluster. """ if len(self.members) == 1: member = self.members[0] self.summary = ' '.join(summarize(member.title, member.text)) else: self.summary = ' '.join( multisummarize([m.text for m in self.members])) return self.summary def titleize(self): """ Generate a title for this cluster. Also selects a representative image. Looks for the cluster member that is most similar to the others, and then uses the title of that member. """ max_member = (None, 0) max_member_w_image = (None, 0) for member in self.members: avg_sim = self.similarity(member) if avg_sim >= max_member[1]: max_member = (member, avg_sim) if avg_sim >= max_member_w_image[1] and member.image is not None: max_member_w_image = (member, avg_sim) self.title = max_member[0].title if max_member_w_image[0] is not None: self.image = max_member_w_image[0].image def entitize(self): """ Update entities for this cluster. """ self.entities = list( set( chain.from_iterable( [member.entities for member in self.members]))) def update(self): """ Update the cluster's attributes, optionally saving (saves by default). """ self.titleize() self.summarize() self.entitize() self.updated_at = datetime.utcnow() self.created_at = datetime.utcnow() def add(self, member): """ Add an member to the cluster. """ self.members.append(member) def similarity(self, obj): """ Calculate the similarity of an object with this cluster, or the similarity between another cluster and this cluster. If it is an object, that object must have a `similarity` method implemented. """ sims = [obj.similarity(member) for member in self.members] # Calculate average similarity. return sum(sims) / len(sims) def timespan(self, start, end=None): """ Get cluster members within a certain (date)timespan. Args: | start (datetime) | end (datetime) -- default is now (UTC) """ if end is None: end = datetime.utcnow() return [ member for member in self.members if start < member.created_at < end ]
class Article(Clusterable): """ An article. """ __tablename__ = 'article' __entities__ = {'secondary': articles_entities, 'backref_name': 'articles'} vectors = db.Column(db.PickleType) title = db.Column(db.Unicode) text = db.Column(db.UnicodeText) html = db.Column(db.UnicodeText) ext_url = db.Column(db.Unicode) image = db.Column(db.String()) source_id = db.Column(db.Integer, db.ForeignKey('source.id')) authors = db.relationship('Author', secondary=articles_authors, backref=db.backref('articles', lazy='dynamic')) def __init__(self, **kwargs): for key in kwargs: setattr(self, key, kwargs[key]) if self.text is not None: self.entitize() self.vectorize() def vectorize(self): """ Returns a tuple of vectors representing this article. Articles are represented by: (bag of words vector, entities vector) """ if self.vectors is None: bow_vec = vectorize(self.text) ent_vec = vectorize(' '.join(entities(self.text))) self.vectors = [bow_vec, ent_vec] return self.vectors def entitize(self): """ Process the article text for entities. """ ents = [] for e_name in entities(self.text): # TO DO: Need to find a way of getting canonical name. # Search for the entity. slug = slugify(e_name) e = Entity.query.get(slug) # If one doesn't exist, create a new one. if not e: e = Entity(e_name) db.session.add(e) db.session.commit() ents.append(e) self.entities = ents def similarity(self, article): """ Calculate the similarity between this article and another article. """ # Compare the text vectors, # and the entity vectors. v = self.vectorize() v_ = article.vectorize() # Linearly combine the similarity values, # weighing them according to these coefficients. # [text vector, entity vector, publication date] coefs = [2, 1, 2] sim = 0 for i, vec in enumerate(v): dist = jaccard(v_[i], v[i]) # Two empty vectors returns a jaccard distance of NaN. # Set it to be 1, i.e. consider them completely different # (or, put more clearly, they have nothing in common) # FYI if jaccard runs on empty vectors, it will throw a warning. if isnan(dist): dist = 1 s = 1 - dist sim += (coefs[i] * s) # Also take publication dates into account. ideal_time = 259200 # 3 days, in seconds t, t_ = self.created_at, article.created_at # Subtract the more recent time from the earlier time. time_diff = t - t_ if t > t_ else t_ - t time_diff = time_diff.total_seconds() # Score is normalized [0, 1], where 1 is within the ideal time, # and approaches 0 the longer the difference is from the ideal time. time_score = 1 if time_diff < ideal_time else ideal_time / time_diff sim += (coefs[2] * time_score) # Normalize back to [0, 1]. return sim / sum(coefs)