예제 #1
0
class Grant(db.Model):
    id = db.Column(db.Integer, primary_key=True)

    user_id = db.Column(db.ForeignKey('user.id', ondelete='CASCADE'))
    user = db.relationship('User')

    client_id = db.Column(db.ForeignKey('client.client_id'), nullable=False)
    client = db.relationship('Client')

    code = db.Column(db.String(255), index=True, nullable=False)
    redirect_uri = db.Column(db.String(255))
    expires = db.Column(db.DateTime)

    _scopes = db.Column(db.Text)

    def delete(self):
        db.session.delete(self)
        db.session.commit
        return self

    @property
    def scopes(self):
        if self._scopes:
            return self._scopes.split()
        return []
예제 #2
0
class ConceptConceptAssociation(BaseConceptAssociation):
    from_concept_slug = db.Column(db.String,
                                  db.ForeignKey('concept.slug',
                                                ondelete='CASCADE',
                                                onupdate='CASCADE'),
                                  primary_key=True)
    concept_slug = db.Column(db.String,
                             db.ForeignKey('concept.slug',
                                           ondelete='CASCADE',
                                           onupdate='CASCADE'),
                             primary_key=True)
    concept = db.relationship('Concept',
                              backref=db.backref('from_concept_associations'),
                              foreign_keys=[concept_slug])
예제 #3
0
class EventConceptAssociation(BaseConceptAssociation):
    __backref__ = 'event_associations'
    event_id = db.Column(db.Integer,
                         db.ForeignKey('event.id',
                                       ondelete='CASCADE',
                                       onupdate='CASCADE'),
                         primary_key=True)
예제 #4
0
class StoryConceptAssociation(BaseConceptAssociation):
    __backref__ = 'story_associations'
    story_id = db.Column(db.Integer,
                         db.ForeignKey('story.id',
                                       ondelete='CASCADE',
                                       onupdate='CASCADE'),
                         primary_key=True)
예제 #5
0
class ArticleConceptAssociation(BaseConceptAssociation):
    __backref__ = 'article_associations'
    article_id = db.Column(db.Integer,
                           db.ForeignKey('article.id',
                                         ondelete='CASCADE',
                                         onupdate='CASCADE'),
                           primary_key=True)
예제 #6
0
class Client(db.Model):
    client_id = db.Column(db.String(40), primary_key=True)
    client_secret = db.Column(db.String(55),
                              unique=True,
                              index=True,
                              nullable=False)

    user_id = db.Column(db.ForeignKey('user.id'))
    user = db.relationship('User')

    name = db.Column(db.String(40))
    desc = db.Column(db.String(400))

    is_confidential = db.Column(db.Boolean)

    _redirect_uris = db.Column(db.Text)
    _default_scopes = db.Column(db.Text)

    _allowed_grant_types = db.Column(db.Text)

    def validate_scopes(self, scopes):
        for scope in scopes:
            if scope not in VALID_SCOPES:
                raise InvalidScope('Invalid scope.')
        return True

    def validate_grant_type(self, grant_type):
        if grant_type not in self.allowed_grant_types:
            raise InvalidGrantType('Invalid or missing grant type.')
        return True

    @property
    def client_type(self):
        if self.is_confidential:
            return 'confidential'
        return 'public'

    @property
    def redirect_uris(self):
        if self._redirect_uris:
            return self._redirect_uris.split()
        return []

    @property
    def default_redirect_uri(self):
        return self.redirect_uris[0]

    @property
    def default_scopes(self):
        if self._default_scopes:
            return self._default_scopes.split()
        return []

    @property
    def allowed_grant_types(self):
        if self._allowed_grant_types:
            return self._allowed_grant_types.split()
        return []
예제 #7
0
class Alias(Model):
    """
    An alias (i.e. a name) for a concept.
    """
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.UnicodeText)
    slug = db.Column(
        db.String,
        db.ForeignKey('concept.slug', ondelete='CASCADE', onupdate='CASCADE'))

    def __init__(self, name):
        self.name = name
예제 #8
0
class Token(db.Model):
    id = db.Column(db.Integer, primary_key=True)

    client_id = db.Column(db.ForeignKey('client.client_id'), nullable=False)
    client = db.relationship('Client')

    user_id = db.Column(db.ForeignKey('user.id'))
    user = db.relationship('User')

    # Currently OAuthLib only supports bearer tokens.
    token_type = db.Column(db.String(40))

    access_token = db.Column(db.String(255), unique=True)
    refresh_token = db.Column(db.String(255), unique=True)
    expires = db.Column(db.DateTime)
    _scopes = db.Column(db.Text)

    @property
    def scopes(self):
        if self._scopes:
            return self._scopes.split()
        return []
예제 #9
0
class Feed(Model):
    """
    A particular feed for a source,
    from which articles can be collected.
    """
    id = db.Column(db.Integer, primary_key=True)
    ext_url = db.Column(db.Unicode, unique=True)
    errors = db.Column(db.Integer, default=0)
    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime, default=datetime.utcnow)
    updating = db.Column(db.Boolean, default=False)
    articles = db.relationship('Article', backref='feed', lazy='dynamic')
    source_id = db.Column(db.Integer, db.ForeignKey('source.id'))
예제 #10
0
파일: story.py 프로젝트: keho98/argos
from argos.datastore import db
from argos.core.models import Entity
from argos.core.models.cluster import Cluster
from argos.core.brain.cluster import cluster
from argos.core.brain.summarize import multisummarize

from argos.util.logger import logger

stories_events = db.Table('stories_events',
        db.Column('story_id', db.Integer, db.ForeignKey('story.id'), primary_key=True),
        db.Column('event_id', db.Integer, db.ForeignKey('event.id'), primary_key=True)
)

stories_entities = db.Table('stories_entities',
        db.Column('entity_slug', db.String, db.ForeignKey('entity.slug')),
        db.Column('story_id', db.Integer, db.ForeignKey('story.id'))
)

class Story(Cluster):
    __tablename__   = 'story'
    __members__     = {'class_name': 'Event', 'secondary': stories_events, 'backref_name': 'stories'}
    __entities__    = {'secondary': stories_entities, 'backref_name': 'stories'}

    @property
    def events(self):
        """
        Convenience :)
        """
        return self.members

    @events.setter
예제 #11
0
from argos.datastore import db
from argos.core.models.cluster import Cluster
from argos.core.brain.cluster import cluster
from argos.core.brain.summarize import summarize, multisummarize

from argos.util.logger import logger

from datetime import datetime

events_articles = db.Table(
    'events_articles',
    db.Column('event_id',
              db.Integer,
              db.ForeignKey('event.id'),
              primary_key=True),
    db.Column('article_id',
              db.Integer,
              db.ForeignKey('article.id'),
              primary_key=True))

events_entities = db.Table(
    'events_entities',
    db.Column('entity_slug', db.String, db.ForeignKey('entity.slug')),
    db.Column('event_id', db.Integer, db.ForeignKey('event.id')))


class Event(Cluster):
    __tablename__ = 'event'
    __members__ = {
        'class_name': 'Article',
        'secondary': events_articles,
예제 #12
0
class Article(Clusterable):
    """
    An article.
    """
    __tablename__ = 'article'
    __entities__ = {'secondary': articles_entities, 'backref_name': 'articles'}
    vectors = db.Column(db.PickleType)
    title = db.Column(db.Unicode)
    text = db.Column(db.UnicodeText)
    html = db.Column(db.UnicodeText)
    ext_url = db.Column(db.Unicode)
    image = db.Column(db.String())
    source_id = db.Column(db.Integer, db.ForeignKey('source.id'))
    authors = db.relationship('Author',
                              secondary=articles_authors,
                              backref=db.backref('articles', lazy='dynamic'))

    def __init__(self, **kwargs):
        for key in kwargs:
            setattr(self, key, kwargs[key])

        if self.text is not None:
            self.entitize()
            self.vectorize()

    def vectorize(self):
        """
        Returns a tuple of vectors representing this article.

        Articles are represented by:
            (bag of words vector, entities vector)
        """
        if self.vectors is None:
            bow_vec = vectorize(self.text)
            ent_vec = vectorize(' '.join(entities(self.text)))
            self.vectors = [bow_vec, ent_vec]
        return self.vectors

    def entitize(self):
        """
        Process the article text for entities.
        """
        ents = []
        for e_name in entities(self.text):
            # TO DO: Need to find a way of getting canonical name.

            # Search for the entity.
            slug = slugify(e_name)
            e = Entity.query.get(slug)

            # If one doesn't exist, create a new one.
            if not e:
                e = Entity(e_name)
                db.session.add(e)
                db.session.commit()
            ents.append(e)
        self.entities = ents

    def similarity(self, article):
        """
        Calculate the similarity between this article
        and another article.
        """
        # Compare the text vectors,
        # and the entity vectors.
        v = self.vectorize()
        v_ = article.vectorize()

        # Linearly combine the similarity values,
        # weighing them according to these coefficients.
        # [text vector, entity vector, publication date]
        coefs = [2, 1, 2]
        sim = 0
        for i, vec in enumerate(v):
            dist = jaccard(v_[i], v[i])

            # Two empty vectors returns a jaccard distance of NaN.
            # Set it to be 1, i.e. consider them completely different
            # (or, put more clearly, they have nothing in common)
            # FYI if jaccard runs on empty vectors, it will throw a warning.
            if isnan(dist):
                dist = 1
            s = 1 - dist
            sim += (coefs[i] * s)

        # Also take publication dates into account.
        ideal_time = 259200  # 3 days, in seconds
        t, t_ = self.created_at, article.created_at

        # Subtract the more recent time from the earlier time.
        time_diff = t - t_ if t > t_ else t_ - t
        time_diff = time_diff.total_seconds()

        # Score is normalized [0, 1], where 1 is within the ideal time,
        # and approaches 0 the longer the difference is from the ideal time.
        time_score = 1 if time_diff < ideal_time else ideal_time / time_diff
        sim += (coefs[2] * time_score)

        # Normalize back to [0, 1].
        return sim / sum(coefs)
예제 #13
0
파일: user.py 프로젝트: publicscience/argos
class Auth(Model):
    """
    Represents a third-party authentication.
    """
    id                      = db.Column(db.BigInteger(), primary_key=True)
    provider                = db.Column(db.String(255))
    provider_id             = db.Column(db.String(255))
    access_token            = db.Column(db.String(255))
    _access_token_secret    = db.Column('access_token_secret', db.LargeBinary(255))
    user_id                 = db.Column(db.Integer, db.ForeignKey('user.id'))

    def __init__(self, provider, provider_id,  access_token, access_token_secret=None):
        self.provider_id = provider_id
        self.provider = provider
        self.access_token = access_token
        self.access_token_secret = access_token_secret

        # Generate a unique id for this auth based on the provider and the provider id.
        self.id = Auth.gen_id(provider, provider_id)

    def update_token(self, access_token, access_token_secret=None):
        """
        Updates token for an authentication.

        Enforcing that access tokens and their
        secrets must be updated in tandem.
        May need to revisit this decision later.

        Args:
            | access_token (str)        -- the access token
            | access_token_secret (str) -- the access token secret
        """

        # If the auth has a token and no secret, just update the token.
        if self.access_token and self.access_token_secret is None:
            self.access_token = access_token

        # Otherwise, the auth has a token and a secret,
        # and a new secret must be present.
        elif access_token_secret is None:
            raise Exception('This authentication requires a token secret, which was not specified.')

        else:
            self.access_token = access_token
            self.access_token_secret = access_token_secret

    @property
    def access_token_secret(self):
        if self._access_token_secret is not None:
            dec = AES.new(current_app.config['AES_KEY'], AES.MODE_CFB, current_app.config['AES_IV'])
            return dec.decrypt(self._access_token_secret).decode('utf-8')

    @access_token_secret.setter
    def access_token_secret(self, value):
        if value is not None:
            enc = AES.new(current_app.config['AES_KEY'], AES.MODE_CFB, current_app.config['AES_IV'])
            self._access_token_secret = enc.encrypt(value)

    @staticmethod
    def for_provider(provider, provider_id):
        """
        Find an Auth instance by provider.

        Args:
            | provider (str)        -- the provider name, e.g. 'twitter'
            | provider_id (str)     -- the user id assigned by the provider
        """
        id = Auth.gen_id(provider, provider_id)
        return Auth.query.get(id)

    @staticmethod
    def gen_id(provider, provider_id):
        """
        Generates a unique id for an Auth.
        """
        return hash(provider + provider_id)
예제 #14
0
파일: user.py 프로젝트: publicscience/argos
from datetime import datetime
from Crypto.Cipher import AES

from argos.datastore import db, Model

from flask import current_app
from flask.ext.security import Security, UserMixin, RoleMixin

# Table connecting users and roles
roles_users = db.Table('roles_users',
        db.Column('user_id', db.Integer(), db.ForeignKey('user.id')),
        db.Column('role_id', db.Integer(), db.ForeignKey('role.id')))

# Table for users watching stories.
users_stories = db.Table('users_stories',
        db.Column('user_id', db.Integer(), db.ForeignKey('user.id')),
        db.Column('story_id', db.Integer(), db.ForeignKey('story.id')))

# Table for users bookmarking events.
users_events = db.Table('users_events',
        db.Column('user_id', db.Integer(), db.ForeignKey('user.id')),
        db.Column('event_id', db.Integer(), db.ForeignKey('event.id')))

# Table for users 

class AuthExistsForUserException(Exception):
    pass

class Role(Model, RoleMixin):
    """
    A user's Role
예제 #15
0
from argos.core.models.cluster import Clusterable
from argos.core.brain import vectorize, entities

from scipy.spatial.distance import jaccard

from math import isnan
from slugify import slugify

# Ignore the invalid numpy warning,
# which comes up when jaccard uses
# empty vectors.
import numpy
numpy.seterr(invalid='ignore')

articles_authors = db.Table(
    'authors', db.Column('author_id', db.Integer, db.ForeignKey('author.id')),
    db.Column('article_id', db.Integer, db.ForeignKey('article.id')))

articles_entities = db.Table(
    'articles_entities',
    db.Column('entity_slug', db.String, db.ForeignKey('entity.slug')),
    db.Column('article_id', db.Integer, db.ForeignKey('article.id')))


class Article(Clusterable):
    """
    An article.
    """
    __tablename__ = 'article'
    __entities__ = {'secondary': articles_entities, 'backref_name': 'articles'}
    vectors = db.Column(db.PickleType)
예제 #16
0
 def concept_slug(cls):
     return db.Column(db.String,
                      db.ForeignKey('concept.slug',
                                    ondelete='CASCADE',
                                    onupdate='CASCADE'),
                      primary_key=True)
예제 #17
0
from argos.util import storage

import galaxy as gx
from slugify import slugify
from datetime import datetime
from os.path import splitext
from sqlalchemy import event
from sqlalchemy.ext.declarative import declared_attr

from collections import Counter

concepts_mentions = db.Table(
    'concepts_mentions',
    db.Column(
        'alias_id', db.Integer,
        db.ForeignKey('alias.id', ondelete='CASCADE', onupdate='CASCADE')),
    db.Column(
        'concept_slug', db.String,
        db.ForeignKey('concept.slug', ondelete='CASCADE', onupdate='CASCADE')))


class BaseConceptAssociation(Model):
    """
    Models which will be related to concepts must
    subclass this model and specify a backref name
    through a class property called `__backref__`
    and a foreign key property for the related model.

    Example::

        class ArticleConceptAssociation(BaseConceptAssociation):
예제 #18
0
class Article(Clusterable):
    """
    An article.
    """
    __tablename__ = 'article'
    __concepts__ = {
        'association_model': ArticleConceptAssociation,
        'backref_name': 'article'
    }
    __mentions__ = {'secondary': articles_mentions, 'backref_name': 'articles'}
    title = db.Column(db.Unicode)
    text = db.Column(db.UnicodeText)
    html = db.Column(db.UnicodeText)
    ext_url = db.Column(db.Unicode)
    image = db.Column(db.String)
    ignore = db.Column(db.Boolean, default=False)
    score = db.Column(db.Float, default=0.0)
    source_id = db.Column(db.Integer, db.ForeignKey('source.id'))
    feed_id = db.Column(db.Integer, db.ForeignKey('feed.id'))
    node_id = db.Column(db.Integer, unique=True, index=True)
    authors = db.relationship('Author',
                              secondary=articles_authors,
                              backref=db.backref('articles', lazy='dynamic'))

    # There are some articles which are just noise, and we want to ignore them using regexes for their titles.
    ignore_patterns = [
        # NYT country profiles
        re.compile(r'[A-Z].+\sprofile( - Overview)?')
    ]

    def __str__(self):
        return self.title

    def __repr__(self):
        return self.title

    def __init__(self, **kwargs):
        for key in kwargs:
            setattr(self, key, kwargs[key])

        if self.text is not None:
            self.conceptize()

        if self.score is None:
            self.score = 0.0

        self.check_ignored()

    def check_ignored(self):
        for pattern in self.ignore_patterns:
            if pattern.match(self.title):
                self.ignore = True
                break
        else:
            self.ignore = False
        return self.ignore

    def conceptize(self):
        """
        Process the article text for concepts,
        and add the appropriate mentions.
        """
        concepts = []
        for c_name in gx.concepts(self.text):
            # Search for the concept.
            uri = knowledge.uri_for_name(c_name)

            if uri:
                slug = uri.split('/')[-1]
            else:
                slug = slugify(c_name)
            c = Concept.query.get(slug)

            # If an concept is found...
            if c:
                # Add this name as a new alias, if necessary.
                alias = Alias.query.filter_by(name=c_name, concept=c).first()
                if not alias:
                    alias = Alias(c_name)
                    c.aliases.append(alias)
                # Avoid duplicate aliases.
                if alias not in self.mentions:
                    self.mentions.append(alias)

            # If one doesn't exist, create a new one.
            if not c:
                c = Concept(c_name)
                self.mentions.append(c.aliases[0])
                db.session.add(c)
                db.session.commit()

            concepts.append(c)

        # Score the concepts' importance.
        total_found = len(concepts)
        counter = Counter(concepts)
        uniq_concepts = set(concepts)

        assocs = []
        for concept in uniq_concepts:
            score = counter[concept] / total_found
            assoc = ArticleConceptAssociation(concept, score)
            assocs.append(assoc)

        self.concept_associations = assocs

    @property
    def published(self):
        """Convert datetime to seconds"""
        # If not timezone is set, assume UTC.
        # super annoying and it's probably not a good guess but it's
        # all we got for now.
        # In production, we will be setting article publish times as utc when
        # we fetch them, so it should be less of a problem there.
        if self.created_at.tzinfo is None:
            created_at = self.created_at.replace(tzinfo=pytz.UTC)
        delta = created_at - epoch
        return delta.total_seconds()