class UserRule(db.Model): __tablename__ = "users_rules" id = db.Column(db.Integer, primary_key=True, autoincrement=True) user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False) rule_id = db.Column(db.Integer, db.ForeignKey('rules.id'), nullable=False) enabled = db.Column(db.Boolean, nullable=False) levels = db.Column(ARRAY(db.Integer)) created_at = db.Column(db.DateTime, nullable=False) last_modified = db.Column(db.DateTime, nullable=False) rule = db.relationship("Rule", back_populates="user_associations") user = db.relationship("User", back_populates="rule_associations") db.UniqueConstraint('user_id', 'rule_id') def __init__(self, user_id, rule_id, enabled=False, levels=None): self.user_id = user_id self.rule_id = rule_id self.levels = levels self.enabled = enabled self.created_at = datetime.datetime.now() self.last_modified = datetime.datetime.now()
class Post(db.Model): __tablename__ = "posts" id = db.Column(db.Integer, primary_key=True, autoincrement=True) original_id = db.Column(db.String(40), nullable=False) content = db.Column(db.JSON, nullable=False) source = db.Column(db.String(255), nullable=False) retrieved_at = db.Column(db.DateTime, nullable=False) created_at = db.Column(db.DateTime) # filters is_news = db.Column(db.Boolean) toxicity = db.Column(db.Float) gender = db.Column(db.Enum(GenderEnum)) is_corporate = db.Column(db.Boolean) virality_count = db.Column(db.Integer) has_link = db.Column(db.Boolean) news_score = db.Column(db.Float) political_quintile = db.Column(db.Enum(PoliticsEnum)) db.UniqueConstraint('source_id', 'source', name='post_id') def __init__(self, original_id, source, content, is_news): self.original_id = original_id self.source = source self.content = content self.is_news = is_news self.retrieved_at = datetime.datetime.now() if source=='twitter': self.created_at = datetime.datetime.strptime(content['created_at'], '%a %b %d %H:%M:%S +0000 %Y') self.has_link = len(content['entities']['urls']) > 0 # 'possibly_sensitive' in content else: self.created_at = datetime.datetime.strptime(content['created_time'], '%Y-%m-%dT%H:%M:%S+0000') self.has_link = content['type']=='link' def as_dict(self): d = {c.name: getattr(self, c.name) for c in self.__table__.columns if c.name not in ['gender', 'political_quintile']} d['gender'] = str(self.gender) d['political_quintile'] = self.political_quintile.value if self.political_quintile else None return d def get_text(self): # TODO: logic fore getting text - should we get text from link shared, etc? text = "" if self.source=="twitter": text = self.content['full_text'] if 'full_text' in self.content else self.content['text'] if self.source=="facebook": text = self.content['message'] if 'message' in self.content else "" return text def has_toxicity_rate(self): return self.toxicity is not None def update_content(self, content, is_news=False): self.content.update(content) if is_news: self.is_news = True def update_toxicity(self, score): self.toxicity = score db.session.commit() def update_gender_corporate(self, gender, corporate): self.gender = gender self.is_corporate = corporate db.session.commit() def has_gender_corporate(self): return (self.gender is not None) and (self.is_corporate is not None) def update_replies_count(self, count): new_content = self.content.copy() prev_count = self.content['replies_count'] if 'replies_count' in self.content else 0 new_content['replies_count'] = max(count, prev_count) self.content = new_content def has_virality(self): return self.virality_count is not None def has_news_score(self): return self.news_score is not None def has_already_been_analyzed(self): return self.has_virality() and self.has_news_score() and self.has_gender_corporate() and self.has_toxicity_rate() def get_author_name(self): if self.source=='facebook': return self.content['from']['name'] else: if 'retweeted_status' in self.content: return self.content['retweeted_status']['user']['name'] return self.content['user']['name']
class Post(db.Model): __tablename__ = "posts" id = db.Column(db.Integer, primary_key=True, autoincrement=True) original_id = db.Column(db.String(40), nullable=False) content = db.Column(db.JSON, nullable=False) source = db.Column(db.String(255), nullable=False) retrieved_at = db.Column(db.DateTime, nullable=False) created_at = db.Column(db.DateTime) # filters toxicity = db.Column(db.Float) gender = db.Column(db.Enum(GenderEnum)) is_corporate = db.Column(db.Boolean) virality_count = db.Column(db.Integer) has_link = db.Column(db.Boolean) news_score = db.Column(db.Float) db.UniqueConstraint('source_id', 'source', name='post_id') rule_associations = db.relationship("PostAdditiveRule", back_populates="post", cascade="delete, delete-orphan") rules = None __mapper_args__ = { 'polymorphic_on': source, } def __init__(self, original_id, source, content): self.original_id = original_id self.source = source self.content = content self.retrieved_at = datetime.datetime.now() self.rules = None if source == 'twitter': self.created_at = datetime.datetime.strptime( content['created_at'], '%a %b %d %H:%M:%S +0000 %Y') # TODO: fix this eventually # pylint: disable=len-as-condition self.has_link = len(content['entities']['urls'] ) > 0 # 'possibly_sensitive' in content elif source == 'facebook': self.created_at = datetime.datetime.strptime( content['created_time'], '%Y-%m-%dT%H:%M:%S+0000') self.has_link = content['type'] == 'link' elif source == 'mastodon': self.created_at = content['created_at'] if content['card'] and content['card']['type']: self.has_link = content['card']['type'].lower() == 'link' else: self.has_link = False def as_dict(self): d = { c.name: getattr(self, c.name) for c in self.__table__.columns if c.name not in ['gender'] } d['gender'] = str(self.gender) # using the cache saves time over db lookups if self.rules: d['rules'] = self.rules return d def cache_rule(self, rule): """Cache a list of rule dicts that this post has associated with it for serializing later.""" if self.rules is None: self.rules = [] self.rules.append(rule) def get_text(self): raise NotImplementedError def has_toxicity_rate(self): return self.toxicity is not None def update_content(self, content): self.content.update(content) def update_toxicity(self, score): self.toxicity = score db.session.commit() def update_gender_corporate(self, gender, corporate): self.gender = gender self.is_corporate = corporate db.session.commit() def has_gender_corporate(self): return (self.gender is not None) and (self.is_corporate is not None) def get_precomputed_gender(self): return None def has_precomputed_corporate(self): return None def has_virality(self): return self.virality_count is not None def has_news_score(self): return self.news_score is not None def has_already_been_analyzed(self): return self.has_virality() and self.has_news_score() and self.has_gender_corporate()\ and self.has_toxicity_rate() def get_urls(self): raise NotImplementedError def get_author_name(self): raise NotImplementedError def get_likes_count(self): raise NotImplementedError def get_comments_count(self): raise NotImplementedError def get_shares_count(self): raise NotImplementedError