예제 #1
0
class ArticleTopic(Base):

    """ Represents an article having a topic, a 1:N relationship """

    __tablename__ = "atopics"

    article_id = Column(
        psql_UUID(as_uuid=False),
        ForeignKey("articles.id", onupdate="CASCADE", ondelete="CASCADE"),
        nullable=False,
        index=True,
    )

    topic_id = Column(
        psql_UUID(as_uuid=False),
        ForeignKey("topics.id", onupdate="CASCADE", ondelete="CASCADE"),
        nullable=False,
        index=True,
    )

    # The back-reference to the Article parent of this ArticleTopic
    article = relationship("Article", backref=backref("atopics"))
    # The back-reference to the Topic parent of this ArticleTopic
    topic = relationship("Topic", backref=backref("atopics"))

    __table_args__ = (
        PrimaryKeyConstraint("article_id", "topic_id", name="atopics_pkey"),
    )

    def __repr__(self):
        return "ArticleTopic()"
예제 #2
0
class Feedback(Base):
    """ Represents a feedback form submission. """

    __tablename__ = "feedback"

    # UUID
    id = Column(
        psql_UUID(as_uuid=False),
        index=True,
        nullable=False,
        unique=True,
        primary_key=True,
        server_default=text("uuid_generate_v1()"),
    )

    # Timestamp of feedback
    timestamp = Column(DateTime, index=True, nullable=False)

    # Topic (e.g. Embla/Netskrafl/etc.)
    topic = Column(String, index=True, nullable=True)

    # Name
    name = Column(String, index=True, nullable=True)

    # Email
    email = Column(String, index=True, nullable=True)

    # Comment
    comment = Column(String, index=False, nullable=True)

    def __repr__(self):
        return "Feedback(name='{0}', email='{1}', topic='{2}', comment='{3}')".format(
            self.name, self.email, self.topic, self.comment
        )
예제 #3
0
class Topic(Base):

    """ Represents a topic for an article """

    __tablename__ = "topics"

    id = Column(
        psql_UUID(as_uuid=False),
        server_default=text("uuid_generate_v1()"),
        primary_key=True,
    )

    # The topic name
    name = Column(String(128), nullable=False, index=True)

    # An identifier for the topic, such as 'sport', 'business'...
    # The identifier must be usable as a CSS class name.
    identifier = Column(String(32), nullable=False)

    # The topic keywords, in the form word1/cat word2/cat...
    keywords = Column(String, nullable=False)

    # The associated vector, in JSON format
    vector = Column(String)  # Is initally NULL

    # The cosine distance threshold to apply for this topic
    threshold = Column(Float)

    def __repr__(self):
        return "Topic(name='{0}')".format(self.name)
예제 #4
0
class Word(Base):

    """ Represents a word occurring in an article """

    __tablename__ = "words"

    MAX_WORD_LEN = 64

    # Foreign key to an article
    article_id = Column(
        psql_UUID(as_uuid=False),
        ForeignKey("articles.id", onupdate="CASCADE", ondelete="CASCADE"),
        nullable=False,
    )

    # The word stem
    stem = Column(String(MAX_WORD_LEN), index=True, nullable=False)

    # The word category
    cat = Column(String(16), index=True, nullable=False)

    # Count of occurrences
    cnt = Column(Integer, nullable=False)

    # The back-reference to the Article parent of this Word
    article = relationship("Article", backref=backref("words"))

    __table_args__ = (
        PrimaryKeyConstraint("article_id", "stem", "cat", name="words_pkey"),
    )

    def __repr__(self):
        return "Word(stem='{0}', cat='{1}', cnt='{2}')".format(
            self.stem, self.cat, self.cnt
        )
예제 #5
0
파일: scraperdb.py 프로젝트: busla/Reynir
class Word(Base):

    """ Represents a word occurring in an article """

    __tablename__ = 'words'

    # Foreign key to an article
    article_id = Column(psql_UUID(as_uuid = False),
        ForeignKey('articles.id', onupdate="CASCADE", ondelete="CASCADE"),
        nullable = False)

    # The word stem
    stem = Column(String(64), index = True, nullable = False)

    # The word category
    cat = Column(String(16), index = True, nullable = False)

    # Count of occurrences
    cnt = Column(Integer, nullable = False)

    # The back-reference to the Article parent of this Word
    article = relationship("Article", backref=backref('words'))

    __table_args__ = (
        PrimaryKeyConstraint('article_id', 'stem', 'cat', name='words_pkey'),
    )

    def __repr__(self):
        return "Word(stem='{0}', cat='{1}', cnt='{2}')" \
            .format(self.stem, self.cat, self.cnt)

    @classmethod
    def table(cls):
        return cls.__table__
예제 #6
0
파일: models.py 프로젝트: Loknar/Greynir
class Location(Base):
    """ Represents a location """

    __tablename__ = "locations"

    # UUID
    id = Column(
        psql_UUID(as_uuid=False),
        index=True,
        nullable=False,
        unique=True,
        primary_key=True,
        server_default=text("uuid_generate_v1()"),
    )

    # Foreign key to an article
    article_url = Column(
        String,
        # We don't delete associated location if the article is deleted
        ForeignKey("articles.url", onupdate="CASCADE", ondelete="SET NULL"),
        index=True,
        nullable=True,
    )

    # Name
    name = Column(String, index=True)

    # Kind (e.g. 'address', 'street', 'country', 'region', 'placename')
    kind = Column(String(16), index=True)

    # Country (ISO 3166-1 alpha-2, e.g. 'IS')
    country = Column(String(2))

    # Continent ISO code (e.g. 'EU')
    continent = Column(String(2))

    # Coordinates (WGS84)
    latitude = Column(Float)
    longitude = Column(Float)

    # Additional data
    data = Column(JSONB)

    # Timestamp of this entry
    timestamp = Column(DateTime)

    # The back-reference to the Article parent of this Location
    article = relationship("Article",
                           backref=backref("locations", order_by=name))

    __table_args__ = (UniqueConstraint("name", "kind", "article_url"), )

    def __repr__(self):
        return "Location(id='{0}', name='{1}', kind='{2}', country='{3}')".format(
            self.id, self.name, self.kind, self.country)

    @classmethod
    def table(cls):
        return cls.__table__
예제 #7
0
class QueryLog(Base):

    """ Represents a fully anonymized, logged query and its answer. """

    __tablename__ = "querylog"

    # UUID
    id = Column(
        psql_UUID(as_uuid=False),
        index=True,
        nullable=False,
        unique=True,
        primary_key=True,
        server_default=text("uuid_generate_v1()"),
    )

    # See the Query class for documentation of these fields
    timestamp = Column(DateTime, index=True, nullable=False)

    interpretations = Column(JSONB, nullable=True)

    question = Column(String, index=True, nullable=False)

    bquestion = Column(String, index=False, nullable=True)

    answer = Column(String, index=False, nullable=True)

    voice = Column(String, index=False, nullable=True)

    qtype = Column(String(80), index=True, nullable=True)

    key = Column(String(256), index=True, nullable=True)

    error = Column(String(256), nullable=True)

    @staticmethod
    def from_Query(q: Query):
        """ Create QueryLog object from Query object. """
        return QueryLog(
            timestamp=q.timestamp,
            interpretations=q.interpretations,
            question=q.question,
            bquestion=q.bquestion,
            answer=q.answer,
            voice=q.voice,
            qtype=q.qtype,
            key=q.key,
            error=q.error,
        )

    def __repr__(self):
        return "QueryLog(question='{0}', answer='{1}')".format(
            self.question, self.answer
        )
예제 #8
0
class Correction(Base):

    """ Represents correction feedback """

    __tablename__ = "corrections"

    # Primary key (UUID)
    id = Column(
        psql_UUID(as_uuid=False),
        server_default=text("uuid_generate_v1()"),
        primary_key=True
    )

    # Timestamp of this entry
    timestamp = cast(datetime, Column(DateTime, nullable=False, index=True))

    # The original sentence being annotated
    sentence = cast(str, Column(String, nullable=False))

    # Annotation code
    code = cast(str, Column(String(32), nullable=False, index=True))

    # Annotation text
    annotation = cast(str, Column(String, nullable=False))

    # Annotation span
    start = cast(int, Column(Integer, nullable=False))
    end = cast(int, Column(Integer, nullable=False))

    # Correction
    correction = cast(str, Column(String, nullable=False))

    # User feedback
    feedback = cast(str, Column(String(32), nullable=False, index=True))

    # Reason text
    reason = cast(str, Column(String(32), index=True))

    def __repr__(self) -> str:
        return "Correction(id='{0}', sent='{1}', code='{2}', annotation='{3}', feedback='{4}')".format(
            self.id, self.sentence, self.code, self.annotation, self.feedback
        )
예제 #9
0
class Query(Base):

    """ Represents a logged incoming query with its answer """

    __tablename__ = "queries"

    # UUID
    id = Column(
        psql_UUID(as_uuid=False),
        index=True,
        nullable=False,
        unique=True,
        primary_key=True,
        server_default=text("uuid_generate_v1()"),
    )

    # Timestamp of the incoming query
    timestamp = Column(DateTime, index=True, nullable=False)

    # Interpretations
    # JSON array containing list of possible interpretations
    # provided by a speech-to-text engine.
    interpretations = Column(JSONB, nullable=True)

    # Question
    question = Column(String, index=True, nullable=False)

    @hybrid_property
    def question_lc(self):
        return self.question.lower()

    # pylint: disable=no-self-argument
    @question_lc.comparator  # type: ignore
    def question_lc(cls):
        return CaseInsensitiveComparator(cls.question)

    # Beautified question
    bquestion = Column(String, index=False, nullable=True)

    # Answer
    answer = Column(String, index=False, nullable=True)

    @hybrid_property
    def answer_lc(self):
        return self.answer.lower()

    # pylint: disable=no-self-argument
    @answer_lc.comparator  # type: ignore
    def answer_lc(cls):
        return CaseInsensitiveComparator(cls.answer)

    # Voice answer
    voice = Column(String, index=False, nullable=True)

    @hybrid_property
    def voice_lc(self):
        return self.voice.lower()

    # pylint: disable=no-self-argument
    @voice_lc.comparator  # type: ignore
    def voice_lc(cls):
        return CaseInsensitiveComparator(cls.voice)

    # Error code
    error = Column(String(256), nullable=True)

    # When does this answer expire, for caching purposes?
    # NULL=immediately
    expires = Column(DateTime, index=True, nullable=True)

    # The query type, NULL if not able to process
    qtype = Column(String(80), index=True, nullable=True)

    # The query key, NULL if not able to process or not applicable
    key = Column(String(256), index=True, nullable=True)

    # Client type
    # Either "www" (web interface), "ios" (iOS) or "android" (Android)
    client_type = Column(String(80), index=True, nullable=True)

    # Client version
    client_version = Column(String(10), nullable=True)

    # Client identifier, if applicable
    # If web client, this is the HTTP client user agent
    # On iOS and Android, this is a unique device UUID string
    client_id = Column(String(256), index=True, nullable=True)

    # Client location coordinates (WGS84)
    latitude = Column(Float, nullable=True)
    longitude = Column(Float, nullable=True)

    # Client IP address
    remote_addr = Column(INET, nullable=True)

    # Additional context used to answer the query
    context = Column(JSONB, nullable=True)

    # Add an index on the question in lower case
    question_lc_index = Index("ix_queries_question_lc", func.lower(question))

    # !!! The following indices don't work since answers can become
    # !!! very long (thousands of characters) and PostgreSQL has a
    # !!! limit on index entry size vs. its page size.

    # Add an index on the answer in lower case
    # answer_lc_index = Index('ix_queries_answer_lc', func.lower(answer))

    # Add an index on the voice answer in lower case
    # voice_lc_index = Index('ix_queries_voice_lc', func.lower(voice))

    def __repr__(self):
        return "Query(question='{0}', answer='{1}')".format(self.question, self.answer)
예제 #10
0
class Article(Base):

    """ Represents an article from one of the roots, to be scraped
        or having already been scraped """

    __tablename__ = "articles"

    # The article URL is the primary key
    url = Column(String, primary_key=True)

    # UUID
    id = Column(
        psql_UUID(as_uuid=False),
        index=True,
        nullable=False,
        unique=True,
        server_default=text("uuid_generate_v1()"),
    )

    # Foreign key to a root
    root_id = Column(
        Integer,
        # We don't delete associated articles if the root is deleted
        ForeignKey("roots.id", onupdate="CASCADE", ondelete="SET NULL"),
    )

    # Article heading, if known
    heading = Column(String)
    # Article author, if known
    author = Column(String)
    # Article time stamp, if known
    timestamp = Column(DateTime, index=True)

    # Authority of this article, 1.0 = most authoritative, 0.0 = least authoritative
    authority = Column(Float)
    # Time of the last scrape of this article
    scraped = Column(DateTime, index=True)
    # Time of the last parse of this article
    parsed = Column(DateTime, index=True)
    # Time of the last processing of this article
    processed = Column(DateTime, index=True)
    # Time of the last indexing of this article
    indexed = Column(DateTime, index=True)
    # Module used for scraping
    scr_module = Column(String(80))
    # Class within module used for scraping
    scr_class = Column(String(80))
    # Version of scraper class
    scr_version = Column(String(16))
    # Version of parser/grammar/config
    parser_version = Column(String(64))
    # Parse statistics
    num_sentences = Column(Integer)
    num_parsed = Column(Integer)
    ambiguity = Column(Float)

    # The HTML obtained in the last scrape
    html = Column(String)
    # The parse tree obtained in the last parse
    tree = Column(String)
    # The tokens of the article in JSON string format
    tokens = Column(String)
    # The article topic vector as an array of floats in JSON string format
    topic_vector = Column(String)

    # The back-reference to the Root parent of this Article
    root = relationship(
        "Root",
        foreign_keys="Article.root_id",
        backref=backref("articles", order_by=url),
    )

    def __repr__(self):
        return "Article(url='{0}', heading='{1}', scraped={2})".format(
            self.url, self.heading, self.scraped
        )