Example #1
0
class TweetWord(models.Model):
    dataset = models.ForeignKey(Dataset,
                                related_name="tweet_words",
                                null=True,
                                blank=True,
                                default=None)
    original_text = base_models.Utf8CharField(max_length=100,
                                              db_index=True,
                                              blank=True,
                                              default="")
    pos = models.CharField(max_length=4, null=True, blank=True, default="")
    text = base_models.Utf8CharField(max_length=100,
                                     db_index=True,
                                     blank=True,
                                     default="")
    messages = models.ManyToManyField(Message, related_name='tweet_words')

    def __repr__(self):
        return self.text

    def __unicode__(self):
        return self.__repr__()

    @property
    def related_words(self):
        return TweetWord.objects.filter(dataset=self.dataset,
                                        text=self.text).all()

    @property
    def all_messages(self):
        queryset = self.dataset.message_set.all()
        queryset = queryset.filter(
            utils.levels_or("tweet_words__id",
                            map(lambda x: x.id, self.related_words)))
        return queryset
Example #2
0
class Topic(models.Model):
    model = models.ForeignKey(TopicModel, related_name='topics')
    name = base_models.Utf8CharField(max_length=100)
    description = base_models.Utf8CharField(max_length=200)
    index = models.IntegerField()
    alpha = models.FloatField()

    messages = models.ManyToManyField(Message,
                                      through='MessageTopic',
                                      related_name='topics')
    words = models.ManyToManyField(Word,
                                   through='TopicWord',
                                   related_name='topics')
Example #3
0
class Word(models.Model):
    dictionary = models.ForeignKey(Dictionary, related_name='words')
    index = models.IntegerField()
    text = base_models.Utf8CharField(max_length=100)
    document_frequency = models.IntegerField()

    messages = models.ManyToManyField(Message,
                                      through='MessageWord',
                                      related_name='words')

    def __repr__(self):
        return self.text

    def __unicode__(self):
        return self.__repr__()
Example #4
0
class Feature(models.Model):
    dictionary = models.ForeignKey(Dictionary, related_name='features')
    index = models.IntegerField()
    text = base_models.Utf8CharField(max_length=150)
    document_frequency = models.IntegerField()

    messages = models.ManyToManyField(Message,
                                      through='MessageFeature',
                                      related_name='features')
    source = models.ForeignKey(User,
                               related_name="features",
                               default=None,
                               null=True)
    """The user that add this feature; None means that is a system feature"""
    origin = models.ForeignKey(Message,
                               related_name="user_features",
                               default=None,
                               null=True)
    """The message that this feature is added to the list from"""

    created_at = models.DateTimeField(auto_now_add=True, default=None)
    """The code created time"""

    last_updated = models.DateTimeField(auto_now_add=True,
                                        auto_now=True,
                                        default=None)
    """The code updated time"""

    valid = models.BooleanField(default=True)
    """ Whether this code is valid (False indicate the feature has been removed) """
    def __repr__(self):
        return self.text

    def __unicode__(self):
        return self.__repr__()

    def get_origin_message_code(self):
        code = None
        if self.origin:
            assignment = self.origin.code_assignments.filter(
                source=self.source, is_user_labeled=True, valid=True).first()
            if assignment:
                code = assignment.code
        return code
Example #5
0
class PrecalcCategoricalDistribution(models.Model):
    dataset = models.ForeignKey(Dataset,
                                related_name="distributions",
                                null=True,
                                blank=True,
                                default=None)
    dimension_key = models.CharField(db_index=True,
                                     max_length=64,
                                     blank=True,
                                     default="")
    level = base_models.Utf8CharField(db_index=True,
                                      max_length=128,
                                      blank=True,
                                      default="")
    count = models.IntegerField()

    class Meta:
        index_together = [
            ["dimension_key", "level"],
        ]
Example #6
0
class Code(models.Model):
    """A code of a message"""

    text = base_models.Utf8CharField(max_length=200)
    """The text of the code"""
    def __repr__(self):
        return self.text

    def __unicode__(self):
        return self.__repr__()

    def get_definition(self, source):
        if not self.definitions.filter(source=source).exists():
            return None

        definition = self.definitions.filter(
            source=source, valid=True).order_by("-last_updated").first()
        return {
            "code_id": self.id,
            "code_text": self.text,
            "source": source,
            "text": definition.text,
            "examples": definition.examples
        }
Example #7
0
class Person(models.Model):
    """
    A person who sends messages in a dataset.
    """
    class Meta:
        index_together = (('dataset', 'original_id')  # used by the importer
                          )

    dataset = models.ForeignKey(Dataset)
    """Which :class:`Dataset` this person belongs to"""

    original_id = models.BigIntegerField(null=True, blank=True, default=None)
    """An external id for the person, e.g. a user id from Twitter"""

    username = base_models.Utf8CharField(max_length=150,
                                         null=True,
                                         blank=True,
                                         default=None)
    """Username is a short system-y name."""

    full_name = base_models.Utf8CharField(max_length=250,
                                          null=True,
                                          blank=True,
                                          default=None)
    """Full name is a longer user-friendly name"""

    language = models.ForeignKey(Language, null=True, blank=True, default=None)
    """The person's primary :class:`Language`"""

    message_count = models.PositiveIntegerField(blank=True, default=0)
    """The number of messages the person produced"""

    replied_to_count = models.PositiveIntegerField(blank=True, default=0)
    """The number of times the person's messages were replied to"""

    shared_count = models.PositiveIntegerField(blank=True, default=0)
    """The number of times the person's messages were shared or retweeted"""

    mentioned_count = models.PositiveIntegerField(blank=True, default=0)
    """The number of times the person was mentioned in other people's messages"""

    friend_count = models.PositiveIntegerField(blank=True, default=0)
    """The number of people this user has connected to"""

    follower_count = models.PositiveIntegerField(blank=True, default=0)
    """The number of people who have connected to this person"""

    profile_image_url = models.TextField(null=True, blank=True, default="")
    """The person's profile image url"""

    def __unicode__(self):
        return self.username

    @property
    def profile_image_processed_url(self):
        url = self.profile_image_url
        if url != "" and self.dataset.has_prefetched_images:
            pattern = re.compile('/[_\.\-\w\d]+\.([\w]+)$')
            results = pattern.search(url)
            if results:
                suffix = results.groups()[0]
                url = "profile_" + str(self.original_id) + "." + suffix

        return url
Example #8
0
class Hashtag(models.Model):
    """A hashtag in a message"""

    text = base_models.Utf8CharField(max_length=100, db_index=True)
    """The text of the hashtag, without the hash"""