class TweetWord(models.Model): dataset = models.ForeignKey(Dataset, related_name="tweet_words", null=True, blank=True, default=None) original_text = base_models.Utf8CharField(max_length=100, db_index=True, blank=True, default="") pos = models.CharField(max_length=4, null=True, blank=True, default="") text = base_models.Utf8CharField(max_length=100, db_index=True, blank=True, default="") messages = models.ManyToManyField(Message, related_name='tweet_words') def __repr__(self): return self.text def __unicode__(self): return self.__repr__() @property def related_words(self): return TweetWord.objects.filter(dataset=self.dataset, text=self.text).all() @property def all_messages(self): queryset = self.dataset.message_set.all() queryset = queryset.filter( utils.levels_or("tweet_words__id", map(lambda x: x.id, self.related_words))) return queryset
class Topic(models.Model): model = models.ForeignKey(TopicModel, related_name='topics') name = base_models.Utf8CharField(max_length=100) description = base_models.Utf8CharField(max_length=200) index = models.IntegerField() alpha = models.FloatField() messages = models.ManyToManyField(Message, through='MessageTopic', related_name='topics') words = models.ManyToManyField(Word, through='TopicWord', related_name='topics')
class Word(models.Model): dictionary = models.ForeignKey(Dictionary, related_name='words') index = models.IntegerField() text = base_models.Utf8CharField(max_length=100) document_frequency = models.IntegerField() messages = models.ManyToManyField(Message, through='MessageWord', related_name='words') def __repr__(self): return self.text def __unicode__(self): return self.__repr__()
class Feature(models.Model): dictionary = models.ForeignKey(Dictionary, related_name='features') index = models.IntegerField() text = base_models.Utf8CharField(max_length=150) document_frequency = models.IntegerField() messages = models.ManyToManyField(Message, through='MessageFeature', related_name='features') source = models.ForeignKey(User, related_name="features", default=None, null=True) """The user that add this feature; None means that is a system feature""" origin = models.ForeignKey(Message, related_name="user_features", default=None, null=True) """The message that this feature is added to the list from""" created_at = models.DateTimeField(auto_now_add=True, default=None) """The code created time""" last_updated = models.DateTimeField(auto_now_add=True, auto_now=True, default=None) """The code updated time""" valid = models.BooleanField(default=True) """ Whether this code is valid (False indicate the feature has been removed) """ def __repr__(self): return self.text def __unicode__(self): return self.__repr__() def get_origin_message_code(self): code = None if self.origin: assignment = self.origin.code_assignments.filter( source=self.source, is_user_labeled=True, valid=True).first() if assignment: code = assignment.code return code
class PrecalcCategoricalDistribution(models.Model): dataset = models.ForeignKey(Dataset, related_name="distributions", null=True, blank=True, default=None) dimension_key = models.CharField(db_index=True, max_length=64, blank=True, default="") level = base_models.Utf8CharField(db_index=True, max_length=128, blank=True, default="") count = models.IntegerField() class Meta: index_together = [ ["dimension_key", "level"], ]
class Code(models.Model): """A code of a message""" text = base_models.Utf8CharField(max_length=200) """The text of the code""" def __repr__(self): return self.text def __unicode__(self): return self.__repr__() def get_definition(self, source): if not self.definitions.filter(source=source).exists(): return None definition = self.definitions.filter( source=source, valid=True).order_by("-last_updated").first() return { "code_id": self.id, "code_text": self.text, "source": source, "text": definition.text, "examples": definition.examples }
class Person(models.Model): """ A person who sends messages in a dataset. """ class Meta: index_together = (('dataset', 'original_id') # used by the importer ) dataset = models.ForeignKey(Dataset) """Which :class:`Dataset` this person belongs to""" original_id = models.BigIntegerField(null=True, blank=True, default=None) """An external id for the person, e.g. a user id from Twitter""" username = base_models.Utf8CharField(max_length=150, null=True, blank=True, default=None) """Username is a short system-y name.""" full_name = base_models.Utf8CharField(max_length=250, null=True, blank=True, default=None) """Full name is a longer user-friendly name""" language = models.ForeignKey(Language, null=True, blank=True, default=None) """The person's primary :class:`Language`""" message_count = models.PositiveIntegerField(blank=True, default=0) """The number of messages the person produced""" replied_to_count = models.PositiveIntegerField(blank=True, default=0) """The number of times the person's messages were replied to""" shared_count = models.PositiveIntegerField(blank=True, default=0) """The number of times the person's messages were shared or retweeted""" mentioned_count = models.PositiveIntegerField(blank=True, default=0) """The number of times the person was mentioned in other people's messages""" friend_count = models.PositiveIntegerField(blank=True, default=0) """The number of people this user has connected to""" follower_count = models.PositiveIntegerField(blank=True, default=0) """The number of people who have connected to this person""" profile_image_url = models.TextField(null=True, blank=True, default="") """The person's profile image url""" def __unicode__(self): return self.username @property def profile_image_processed_url(self): url = self.profile_image_url if url != "" and self.dataset.has_prefetched_images: pattern = re.compile('/[_\.\-\w\d]+\.([\w]+)$') results = pattern.search(url) if results: suffix = results.groups()[0] url = "profile_" + str(self.original_id) + "." + suffix return url
class Hashtag(models.Model): """A hashtag in a message""" text = base_models.Utf8CharField(max_length=100, db_index=True) """The text of the hashtag, without the hash"""