Beispiel #1
0
class Campaign(so.SQLObject):
    """
    Model a Campaign, which can be assigned to a Tweet as a label.

    Used to group Tweets which are added to the DB because they matched
    the same campaign, such as a search topic. See docs/models.md document.
    """
    class sqlmeta:
        defaultOrder = "name"

    # Campaign name can be any case and may have spaces.
    name = so.StringCol(alternateID=True, length=50)

    # Query string to use on Twitter API search, whether manually or on
    # schedule. This is optional, to allow campaigns which are not searches.
    searchQuery = so.StringCol(default=None)

    createdAt = so.DateTimeCol(notNull=True, default=so.DateTimeCol.now)

    # Link to Tweet objects assigned to the Campaign.
    tweets = so.SQLRelatedJoin("Tweet",
                               intermediateTable="tweet_campaign",
                               createRelatedTable=False)

    @classmethod
    def getOrCreate(cls, campaignName, query=None):
        """
        Get a campaign otherwise create and return one.

        Query may be empty as in some cases like a utility's campaign label
        the campaign is a label for grouping rather than searching.
        """
        try:
            return cls.byName(campaignName)
        except SQLObjectNotFound:
            return cls(name=campaignName, searchQuery=query)

    @classmethod
    def getOrRaise(cls, campaignName):
        """
        Get campaign by name otherwise raise an error, with instructions.
        """
        try:
            return cls.byName(campaignName)
        except SQLObjectNotFound as e:
            raise type(e)(
                "Use the campaign manager to create the Campaign"
                " as name and search query. Name not found: {!r}".format(
                    campaignName))
Beispiel #2
0
class Label(so.SQLObject):
    """
    Model a label.

    Unlike a folder, a label does not fit into a tree hierarchy and its name
    must be unique in order to look it up properly.
    """

    # Descriptive name of the label, which must be unique.
    name = so.UnicodeCol(alternateID=True)

    # Link to pages assigned to the label.
    pages = so.SQLRelatedJoin('Page',
                              intermediateTable='page_label',
                              createRelatedTable=False)
Beispiel #3
0
class Page(so.SQLObject):
    """
    Model a URI for a webpage on the internet.

    Do not worry about duplicate pairs of domain and path, since we want
    to allow those to occur on an import and to clean them up later.

    A page may have a null Folder (as unsorted), though a folder must always
    have a parent folder, even if the top folder is "root".
    """

    # The host website for the page.
    # TODO: Ensure this is always converted lowercase rather than raising
    # an error.
    domain = so.ForeignKey('Domain', notNull=True)

    # The location of the webpage relative to the domain.
    # TODO: Should this start with forwardslash? Check what happens when
    # splitting and joining.
    # TODO: Create custom validator?
    path = so.UnicodeCol(notNull=True)

    # Webpage title, usually taken from the metadata of the HTML head section.
    title = so.UnicodeCol(default=None)

    # The date and time when the record was created. Defaults to the
    # current time.
    created_at = so.DateTimeCol(notNull=True, default=so.DateTimeCol.now)

    # Optional preview image for the link, scraped from the metadata.
    image_url = so.UnicodeCol(default=None)

    description = so.UnicodeCol(default=None)

    # The folder this link is placed into. If null then the link must still
    # be sorted. Domain and path pairs must be unique in a folder.
    folder = so.ForeignKey('Folder')
    unique_idx = so.DatabaseIndex(domain, path, folder, unique=True)

    source = so.ForeignKey('Source', notNull=True)

    # Link to labels which this page is assigned to.
    labels = so.SQLRelatedJoin('Labels',
                               intermediateTable='page_label',
                               createRelatedTable=False)

    def get_url(self):
        return "".join((self.domain.value, self.path))
Beispiel #4
0
class Category(so.SQLObject):
    """
    Model a Category, which can be assigned to Profiles.

    Group similar profiles in a category. See docs/models.md document.
    """
    class sqlmeta:
        defaultOrder = "name"

    # Category name can be any case and may have spaces.
    name = so.StringCol(alternateID=True, length=50)

    createdAt = so.DateTimeCol(notNull=True, default=so.DateTimeCol.now)

    # Get Profile objects assigned to the Category.
    profiles = so.SQLRelatedJoin("Profile",
                                 intermediateTable="profile_category",
                                 createRelatedTable=False)
Beispiel #5
0
class Profile(so.SQLObject):
    """
    Models a user profile on Twitter.

    Note that URL columns are named as 'Url', since SQLOlbject converts
    'imageURL' to db column named 'image_ur_l'.

    Twitter screen name and username rules:
        https://help.twitter.com/en/managing-your-account/twitter-username-rules
    We use slightly higher values than the ones there, to be safe.

    Notes on screen name:
    - This should not have unique restriction as users can edit their
        screen name so others can take an older screen name. Or someone
        could delete and recreate their account.
    - Twitter itself enforces uniqueness across case.
    """

    # Profile's ID (integer), as assigned by Twitter when the Profile was
    # created. This is a global ID, rather than an ID specific to our local db.
    guid = so.IntCol(alternateID=True)

    # Profile screen name.
    screenName = so.StringCol(notNull=True, length=30)

    # Profile display Name.
    name = so.StringCol(notNull=True, length=60)

    # Description, as set in profile's bio.
    description = so.StringCol(default=None)

    # Location, as set in profile's bio.
    location = so.StringCol(default=None)

    # Link to the profile's image online. This will only be thumbnail size.
    imageUrl = so.StringCol(default=None, validator=URL)

    # Count of profile's followers.
    followersCount = so.IntCol(notNull=True)

    # Count of profile's statuses (tweets) posted by this profile.
    statusesCount = so.IntCol(notNull=True)

    # Profile's verified status.
    verified = so.BoolCol(notNull=True, default=False)

    # Join the Profile with its created tweets in the Tweet table.
    tweets = so.MultipleJoin("Tweet")

    # Date and time when follower and status counts were last updated.
    modified = so.DateTimeCol(notNull=True, default=so.DateTimeCol.now)
    modifiedIdx = so.DatabaseIndex(modified)

    # Get Category objects which this Profile has been assigned to, if any.
    categories = so.SQLRelatedJoin("Category",
                                   intermediateTable="profile_category",
                                   createRelatedTable=False)

    def set(self, **kwargs):
        """
        Override the update hook to update the modified field if necessary.
        """
        if ("followersCount" in kwargs
                or "statusesCount" in kwargs) and "modified" not in kwargs:
            kwargs["modified"] = so.DateTimeCol.now()
        super(Profile, self).set(**kwargs)

    def getFlatDescription(self):
        """
        Return the description with newline characters replaced with spaces.
        """
        if self.description is not None:
            return lib.text_handling.flattenText(self.description)

        return None

    def getProfileUrl(self):
        """
        Get link to the profile's page online.

        :return: Twitter profile's URL, as a string.
        """
        return "https://twitter.com/{0}".format(self.screenName)

    def getLargeImageUrl(self):
        """
        Get link to a large version profile's image, based on thumbnail URL.

        The image URL comes from the API as '..._normal.jpeg', but
        from API calls on loading a twitter.com page, it is possible to
        see that the image media server allows variations of the last part,
        to return a large image. Such as
         - '..._bigger.jpeg' (which is not much bigger than the normal
                thumbnail)
         - '..._400x400.jpeg' (which is much bigger).

        :return: image URL using 400x400 size parameter, or None if value
            was not set.
        """
        if self.imageUrl:
            return self.imageUrl.replace("_normal", "_400x400")

        return None

    def prettyPrint(self):
        """
        Method to print the attributes of the Profile instance neatly.

        :return: dictionary of data which was printed.
        """
        output = """\
Screen name    : @{screenName}
Name           : {name}
Verified       : {verified}
Followers      : {followers:,d}
Statuses       : {statuses:,d}
DB tweets      : {tweetCount}
Description    : {description}
Profile URL    : {url}
Image URL      : {imageUrl}
Stats modified : {statsModified}
        """
        data = dict(
            screenName=self.screenName,
            name=self.name,
            verified=self.verified,
            followers=self.followersCount,
            statuses=self.statusesCount,
            tweetCount=len(self.tweets),
            description=self.getFlatDescription(),
            url=self.getProfileUrl(),
            imageUrl=self.getLargeImageUrl(),
            statsModified=self.modified,
        )
        print(output.format(**data))

        return data
Beispiel #6
0
class Tweet(so.SQLObject):
    """
    Models a tweet on Twitter.

    If we are inserting the Tweet in our db, we expect to always have the
    author's profile in the Profile table. If the tweet is a reply, we will
    have references to the target Profile and original Tweet as GUID integers.
    But we are unlikely to have those object stored in our db. Use
    the `.getInReplyToTweet` and `.getInReplyToProfile` methods to see if those
    exist in the db, otherwise use the GUIDs to look up data from the
    Twitter API and then store them locally as db records.

    For relating a Tweet to its author Profile with a foreign key, a
    `setProfileByGuid` method could be implemented to set the profile
    foreign key using a given GUID, but that would require doing a search
    each time. So, when creating a Tweet object or multiple objects for one
    Profile, it is preferable to get the Profile object's ID once
    and then repeately pass that in as an argument for each Tweet object
    that is created for that Profile.

    For ordering, the '-guid' syntax is here is preferred, since
    'guid DESC' results in an error when getting tweets of a Profile object,
    even though doing a query on Tweet class itself is fine.
        `AttributeError: 'Tweet' object has no attribute 'guid DESC'`
    The error is also raised for multiple names e.g. '-guid, message'.
    """
    class sqlmeta:
        # Show recent Tweets (with higher GUID values) first.
        defaultOrder = "-guid"

    # Tweet ID (integer), as assigned by Twitter when the Tweet was posted.
    # This is a global ID, rather than specific to our local db.
    guid = so.IntCol(alternateID=True)

    # Link to Tweet's author in the Profile table. Delete Tweet if
    # the Profile is deleted.
    profile = so.ForeignKey("Profile", notNull=True, cascade=True)
    profileIdx = so.DatabaseIndex(profile)

    # Date and time the tweet was posted.
    createdAt = so.DateTimeCol(notNull=True)
    createdAtIdx = so.DatabaseIndex(createdAt)

    # Tweet message text. Length is not validated since expanded tweets can
    # be longer than the standard 280 (previously 140) characters.
    message = so.StringCol(notNull=True)

    # Count of favorites on this Tweet.
    favoriteCount = so.IntCol(notNull=True)

    # Count of retweets of this Tweet.
    retweetCount = so.IntCol(notNull=True)

    # If the tweet is a reply, the GUID of the Tweet which the reply is
    # directed at (from reply_to_status_id field). This does not require
    # the Tweet to be in the local db.
    inReplyToTweetGuid = so.IntCol(default=None)

    # If the tweet is a reply, the GUID of the Profile which the reply is
    # directed at (from reply_to_user_id field). This does not require
    # the Tweet to be in the local db.
    inReplyToProfileGuid = so.IntCol(default=None)

    # Date and time when favorite and retweet counts where last updated.
    modified = so.DateTimeCol(notNull=True, default=so.DateTimeCol.now)
    modifiedIdx = so.DatabaseIndex(modified)

    # Get Campaign objects which this Profile has been assigned to, if any.
    campaigns = so.SQLRelatedJoin("Campaign",
                                  intermediateTable="tweet_campaign",
                                  createRelatedTable=False)

    def set(self, **kwargs):
        """
        Override the update hook to update the modified field if necessary.
        """
        if ("favoriteCount" in kwargs
                or "retweetCount" in kwargs) and "modified" not in kwargs:
            kwargs["modified"] = so.DateTimeCol.now()
        super(Tweet, self).set(**kwargs)

    def isRT(self):
        return self.message.startswith("RT ")

    def isReply(self):
        return self.inReplyToProfileGuid is not None

    def getFlatMessage(self):
        """
        Return the message with newline characters replaced with spaces.
        """
        return lib.text_handling.flattenText(self.message)

    def getInReplyToTweet(self):
        """
        If this Tweet is a reply, get the original Tweet it was directed at.

        :return: single Tweet object. Return None if this is not a reply. Raise
            an error if the Tweet is not in the local db.
        """
        if self.inReplyToTweetGuid:
            try:
                return Tweet.byGuid(self.inReplyToTweetGuid)
            except SQLObjectNotFound as e:
                raise type(e)(
                    "Could not find Tweet in db with GUID: {0}".format(
                        self.inReplyToTweetGuid))
        return None

    def getInReplyToProfile(self):
        """
        If this Tweet is a reply, get the Profile which it was directed at.

        :return: single Profile object. Return None if this is not a reply.
            Raise an error if the Tweet is not in the local db.
        """
        if self.inReplyToProfileGuid:
            try:
                return Profile.byGuid(self.inReplyToProfileGuid)
            except SQLObjectNotFound as e:
                raise type(e)(
                    "Could not find Profile in db with GUID: {0}".format(
                        self.inReplyToProfileGuid))
        return None

    def getTweetURL(self):
        """
        Return URL for the tweet as a string, using tweet author's screen name
        and the tweet's GUID.
        """
        return "https://twitter.com/{screenName}/status/{tweetID}".format(
            screenName=self.profile.screenName, tweetID=self.guid)

    def prettyPrint(self):
        """
        Method to print the attributes of the Tweet instance neatly.

        :return: dictionary of data which was printed.
        """
        output = """\
Author            : @{screenName} - {name} - {followers:,d} followers
Created at        : {createdAt}
Message           : {message}
Favorites         : {favoriteCount:,d}
Retweets          : {retweetCount:,d}
Reply To User ID  : {replyProf}
Reply To Tweet ID : {replyTweet}
URL               : {url}
Stats modified    : {statsModified}
        """
        author = self.profile
        data = dict(
            screenName=author.screenName,
            createdAt=self.createdAt,
            name=author.name,
            followers=author.followersCount,
            message=self.getFlatMessage(),
            favoriteCount=self.favoriteCount,
            retweetCount=self.retweetCount,
            replyProf=self.inReplyToProfileGuid,
            replyTweet=self.inReplyToTweetGuid,
            url=self.getTweetURL(),
            statsModified=self.modified,
        )
        print(output.format(**data))

        return data

    def report(self):
        """
        Return Tweet and Profile data as dict for writing a CSV report.
        """
        author = self.profile

        return {
            "Screen name": author.screenName,
            "Followers": author.followersCount,
            "Tweet URL": self.getTweetURL(),
            "Tweet ID": self.guid,
            "Tweeted at": str(self.createdAt),
            "Is reply": "Y" if self.isReply() else "N",
            "Is RT": "Y" if self.isRT() else "N",
            "Message": self.message,
            "Favs": self.favoriteCount,
            "RTs": self.retweetCount,
        }