Esempio n. 1
0
class TagArticle(JSONModel):
    KEY = 'TagArticle:%s'

    tag = StringProperty()
    article_id = IntegerProperty()
    time = IntegerProperty()

    def _get_watching_keys(self, inserting=False):
        return [self.KEY % self.tag]

    def _save_self(self, redis_client, inserting=False):
        key = self.KEY % self.tag
        if self.time:
            redis_client.zadd(key, self.time, self.article_id)
        else:
            redis_client.zrem(key, self.article_id)

    @classmethod
    def get_article_ids(cls, tag_name, cursor=None, limit=CONFIG.ARTICLES_PER_PAGE):
        redis_client = cls.redis_client
        key = cls.KEY % tag_name
        if cursor is None:
            return redis_client.zrevrange(key, 0, limit - 1, withscores=True, score_cast_func=int)
        else:
            return redis_client.zrevrangebyscore(key, '(%d' % cursor, 0, 0, limit, withscores=True, score_cast_func=int)

    @classmethod
    def get_articles(cls, category_name, cursor=None, limit=CONFIG.ARTICLES_PER_PAGE):
        article_ids_with_time = cls.get_article_ids(category_name, cursor)
        if article_ids_with_time:
            from .article import Article
            return Article.get_articles_and_next_cursor(article_ids_with_time, limit=limit)
        return [], None
Esempio n. 2
0
class Model4(Model1):
    a = StringProperty()
    b = IntegerProperty()
    c = BooleanProperty()
    d = FloatProperty()
    e = DateTimeProperty()
    f = DateTimeProperty(auto_now=True)
Esempio n. 3
0
class CategoryArticle(JSONModel):
    KEY = 'CategoryArticle:%s'

    category = StringProperty()
    article_id = IntegerProperty()
    time = IntegerProperty()

    def _get_watching_keys(self, inserting=False):
        return [self.KEY % self.category]

    def _save_self(self, redis_client, inserting=False):
        key = self.KEY % self.category
        if self.time:
            redis_client.zadd(key, self.time, self.article_id)
        else:
            redis_client.zrem(key, self.article_id)
Esempio n. 4
0
class JSONTestModel(JSONModel):
    a = IntegerProperty()
    b = StringProperty()
    c = BooleanProperty()
    d = ListProperty()
    e = Property()

    def _save_self(self, redis_client, inserting=False):
        redis_client.hset(self.KEY, 'self', '1')

    def _save_relative(self, redis_client, inserting=False):
        redis_client.hset(self.KEY, 'relative', '1')
Esempio n. 5
0
class ArticleComments(JSONModel):
    KEY = 'ArticleComments:%d'

    article_id = IntegerProperty()
    comment_ids = ListProperty(int)

    def _get_watching_keys(self, inserting=False):
        return [self.KEY % self.article_id]

    def _save_self(self, redis_client, inserting=False):
        key = self.KEY % self.article_id
        redis_client.delete(key)
        for comment_id in self.comment_ids:
            redis_client.rpush(key, comment_id)

    @classmethod
    def get_by_article_id(cls,
                          article_id,
                          order,
                          page,
                          page_size=CONFIG.COMMENTS_PER_PAGE):
        key = cls.KEY % article_id
        if page < 1:
            page = 1
        if order:
            start_index = (page - 1) * page_size
            end_index = start_index + page_size - 1
        else:
            end_index = -(page - 1) * page_size - 1
            start_index = end_index - page_size + 1
        comment_ids = cls.redis_client.lrange(key, start_index, end_index)
        if comment_ids and not order:
            comment_ids.reverse()
        return comment_ids

    @classmethod
    def append_comment_to_article(cls, redis_client, comment_id, article_id):
        redis_client.rpush(cls.KEY % article_id, comment_id)

    @classmethod
    def get_comment_count_of_article(cls, article_id):
        return cls.redis_client.llen(cls.KEY % article_id)

    @classmethod
    def get_comment_count_of_articles(cls, article_ids):
        with cls.redis_client.pipeline(transaction=False) as pipe:
            for article_id in article_ids:
                pipe.llen(cls.KEY % article_id)
            counts = pipe.execute()

        count_zip = zip(article_ids, [int(count) for count in counts])
        return dict(count_zip)
Esempio n. 6
0
class KeywordArticle(JSONModel):
    keywords = StringProperty()
    article_id = IntegerProperty()

    def _get_watching_keys(self, inserting=False):
        return [self.KEY]

    def _save_self(self, redis_client, inserting=False):
        member = '%s:%d' % (self.keywords, self.article_id)
        redis_client.sadd(self.KEY, member)

    def delete(self, redis_client):
        member = '%s:%d' % (self.keywords, self.article_id)
        redis_client.srem(self.KEY, member)

    @classmethod
    def query_by_keyword(cls,
                         keyword,
                         result_limit=CONFIG.SEARCH_PAGE_SIZE,
                         search_limit=CONFIG.MAX_SEARCH_COUNT):
        cache_key = 'KeywordArticles:' + keyword
        cached_result = redis_cache_client.get(cache_key)
        if cached_result is not None:
            if not cached_result:
                return []
            try:
                article_ids = cached_result.split(',')
                return [int(article_id) for article_id in article_ids]
            except ValueError:
                logging.warning('Key "%s" contains wrong value: %s', cache_key,
                                cached_result)
                redis_cache_client.delete(cache_key)

        pattern = '*%s*:*' % keyword.lower()
        cursor, members = cls.redis_client.sscan(cls.KEY,
                                                 match=pattern,
                                                 count=search_limit)
        if members:
            article_ids = [
                member.rsplit(':', 1)[-1] for member in members[:result_limit]
            ]
            result = [int(article_id) for article_id in article_ids]
        else:
            article_ids = result = []

        redis_cache_client.set(cache_key,
                               ','.join(article_ids),
                               ex=CONFIG.DEFAULT_CACHE_TIME)

        return result
Esempio n. 7
0
class UserEmail(JSONModel):
    email = StringProperty()
    id = IntegerProperty()

    @classmethod
    def get_user_id_by_email(cls, email):
        user_id = cls.redis_client.hget(cls.KEY, email)
        if user_id:
            return int(user_id)

    @classmethod
    def get_user_ids_by_emails(cls, emails):
        return cls.redis_client.hmget(cls.KEY, emails)

    def _save_self(self, redis_client, inserting=False):
        if self.id:
            redis_client.hset(self.KEY, self.email, self.id)
        else:
            redis_client.hdel(self.KEY, self.email)
Esempio n. 8
0
class ArticleURL(JSONModel):
    url = StringProperty()
    article_id = IntegerProperty()

    @classmethod
    def get_article_id_by_url(cls, url):
        article_id = cls.redis_client.hget(cls.KEY, url)
        if article_id:
            return int(article_id)

    @classmethod
    def get_by_url(cls, url):
        article_id = cls.get_article_id_by_url(url)
        if article_id:
            return cls(url=url, id=article_id)

    @classmethod
    def search_by_date(cls, date, limit=CONFIG.SEARCH_PAGE_SIZE):
        cursor, result = cls.redis_client.hscan(cls.KEY, 0, date + '*', limit)
        return result

    def _save_self(self, redis_client, inserting=False):
        if self.url:
            if self.article_id:
                redis_client.hset(self.KEY, self.url, self.article_id)
            else:
                redis_client.hdel(self.KEY, self.url)

    def _check_inserting(self):
        article_id = self.get_article_id_by_url(self.url)
        if article_id:
            article_id = int(article_id)
            if article_id != self.article_id:
                raise IntegrityError(
                    'article url "%s" has been used by article %d' %
                    (self.url, article_id))
Esempio n. 9
0
class IDModel(JSONModel):
    id = IntegerProperty()

    @classmethod
    def count(cls):
        return cls.redis_client.llen(cls.KEY)

    @classmethod
    def get_by_id(cls, entity_id):
        entity_id = int(entity_id)
        if entity_id <= 0 or isinstance(entity_id, long):
            return
        json_content = cls.redis_client.lindex(cls.KEY, entity_id - 1)
        if json_content:
            return cls.from_json(json_content)

    @classmethod
    def get_by_ids(cls, ids, filter_empty=False):
        if not ids:
            return []

        results = cls._get_data_by_ids(ids)

        if filter_empty:
            entities = [cls.from_json(json_content)
                        for json_content in results
                        if json_content]
            return [entity for entity in entities if entity]
        else:
            return [cls.from_json(json_content)
                    for json_content in results]

    @classmethod
    def _get_data_by_ids(cls, ids):
        key = cls.KEY
        pipe = cls.redis_client.pipeline(transaction=False)
        for entity_id in ids:
            # todo: check id > 0
            pipe.lindex(key, int(entity_id) - 1)
        return pipe.execute()

    def save(self, redis_client=None, inserting=False, relative=True, transactional=True):
        super(IDModel, self).save(redis_client, inserting, relative, transactional)

    def _check_inserting(self):
        if self.id is not None:
            raise PropertyError('cannot insert a %s object with id' % self.__class__.__name__)

    def _populate_required_attributes(self, pipeline):
        if self.id is None:
            self.id = pipeline.llen(self.KEY) + 1

    def _save_self(self, pipeline, inserting=False):
        if inserting:
            pipeline.rpush(self.KEY, self.to_json())
        else:
            if self.id is None:
                raise PropertyError('cannot save a %s object without id' % self.__class__.__name__)
            pipeline.lset(self.KEY, self.id - 1, self.to_json())

    def _fail_on_save(self, exception, pipeline, inserting=False):
        if inserting:
            self.id = None
Esempio n. 10
0
class PropertiedTestModel2(PropertiedModel):
    a = IntegerProperty()
Esempio n. 11
0
class PublicTestModel(PublicModel):
    a = IntegerProperty()
Esempio n. 12
0
class IDTestModel(IDModel):
    a = IntegerProperty()
    b = StringProperty()
    c = BooleanProperty()
    d = ListProperty()
Esempio n. 13
0
class HashTestModel(HashModel):
    a = IntegerProperty()
    b = StringProperty()
    c = BooleanProperty()
    d = ListProperty()
Esempio n. 14
0
class Comment(PublicModel):
    article_id = IntegerProperty()
    user_id = IntegerProperty()
    content = StringProperty()
    format = IntegerProperty()
    ua = ListProperty()
    time = DateTimeProperty(auto_now=True)

    HTML_PATTERN = re.compile('<.*?>|\&.*?\;', re.UNICODE)
    ROOT_LINK_PATTERN = re.compile(r'<a href="/([^"]*)">')
    ANCHOR_LINK_PATTERN = re.compile(r'<a href="#([^"]*)">')
    REPLY_LINK_PATTERN = re.compile(r'<a href="[^"]*#comment-id-(\d+)">')

    def html_content(self):
        return format_content(self.content, self.format)

    def html_content_with_full_url(self, article_url):  # for email and ATOM
        content = self.html_content()
        content = self.ROOT_LINK_PATTERN.sub(
            r'<a href="%s/\1">' % CONFIG.MAJOR_HOST_URL, content)
        content = self.ANCHOR_LINK_PATTERN.sub(
            r'<a href="%s#\1">' % article_url, content)
        return content

    def striped_html_content(self, length=CONFIG.LATEST_COMMENTS_LENGTH):
        result = self.HTML_PATTERN.sub(' ', self.html_content())
        return result[:length].strip()

    @classmethod
    def get_comments_of_article(cls,
                                article_id,
                                order,
                                page,
                                page_size=CONFIG.COMMENTS_PER_PAGE,
                                public_only=True):
        comment_ids = ArticleComments.get_by_article_id(
            article_id, order, page, page_size)
        if comment_ids:
            has_next_page = len(comment_ids) == page_size
            return Comment.get_by_ids(comment_ids,
                                      filter_empty=True,
                                      public_only=public_only), has_next_page
        return [], False

    @classmethod
    def get_latest_comments(cls, limit=CONFIG.LATEST_COMMENTS_FOR_SIDEBAR):
        comments_json = cls.redis_client.lrange(cls.KEY, -limit, -1)
        if comments_json:
            comments = []
            for comment_json in reversed(comments_json):
                comment = Comment.from_json(comment_json)
                if comment.public:
                    comments.append(comment)
            if comments:
                article_ids = set()
                user_ids = set()
                for comment in comments:
                    article_ids.add(comment.article_id)
                    user_ids.add(comment.user_id)
                articles = Article.get_by_ids(article_ids, public_only=True)
                if articles:
                    article_dict = {
                        article.id: article
                        for article in articles
                    }
                    users = User.get_by_ids(user_ids, filter_empty=True)
                    if users:
                        user_dict = {user.id: user for user in users}
                        return comments, article_dict, user_dict
        return [], {}, {}

    def _get_relative_keys(self, inserting=False):
        if inserting:
            return [ArticleComments.KEY % self.article_id]
        return []

    def _save_relative(self, redis_client, inserting=False):
        if inserting:
            ArticleComments.append_comment_to_article(redis_client, self.id,
                                                      self.article_id)
Esempio n. 15
0
class ArticleTime(JSONModel):
    article_id = IntegerProperty()
    time = IntegerProperty()

    @classmethod
    def get_article_ids_for_page(cls,
                                 page,
                                 page_size=CONFIG.ARTICLES_PER_PAGE):
        if page_size <= 0:
            return cls.redis_client.zrevrangebyscore(cls.KEY, '+inf', 0)
        if page < 1:
            page = 1
        start_index = (page - 1) * page_size
        return cls.redis_client.zrevrangebyscore(cls.KEY, '+inf', 0,
                                                 start_index, page_size)

    @classmethod
    def get_article_ids(cls,
                        cursor=None,
                        with_time=True,
                        limit=CONFIG.ARTICLES_PER_PAGE):
        if cursor is None:
            return cls.redis_client.zrevrange(cls.KEY,
                                              0,
                                              limit - 1,
                                              withscores=with_time,
                                              score_cast_func=int)
        else:
            return cls.redis_client.zrevrangebyscore(cls.KEY,
                                                     '(%d' % cursor,
                                                     0,
                                                     0,
                                                     limit,
                                                     withscores=with_time,
                                                     score_cast_func=int)

    @classmethod
    def get_previous_article_id(cls, publish_time):
        result = cls.redis_client.zrevrangebyscore(cls.KEY,
                                                   '(%d' % publish_time, 0, 0,
                                                   1)
        if result:
            return int(result[0])

    @classmethod
    def get_next_article_id(cls, publish_time):
        result = cls.redis_client.zrangebyscore(cls.KEY, '(%d' % publish_time,
                                                '+inf', 0, 1)
        if result:
            return int(result[0])

    @classmethod
    def get_article_ids_by_data(cls, date):
        from_dt = parse_date_for_url(date)
        if from_dt:
            from_dt = from_dt.replace(tzinfo=LOCAL_TIMEZONE)
            from_time = datetime_to_timestamp(from_dt)
            to_time = from_time + SECONDS_IN_A_DAY
            from_time -= SECONDS_IN_A_DAY
            article_ids = cls.redis_client.zrangebyscore(
                cls.KEY, from_time, to_time)
            return [int(article_id) for article_id in article_ids]
        return []

    @classmethod
    def get_count(cls):
        return cls.redis_client.zcard(cls.KEY) or 0

    def _save_self(self, redis_client, inserting=False):
        if self.article_id:
            if self.time:
                redis_client.zadd(self.KEY, self.time, self.article_id)
            else:
                redis_client.zrem(self.KEY, self.article_id)
Esempio n. 16
0
class Article(PublicModel):
    title = StringProperty()
    url = StringProperty()
    content = StringProperty()
    format = IntegerProperty()
    category = StringProperty()
    tags = ListProperty()
    keywords = StringProperty()
    public = BooleanProperty()
    pub_time = DateTimeProperty(auto_now=True)
    mod_time = DateTimeProperty(auto_now=True)

    def quoted_url(self):
        return quoted_string(self.url)

    def category_name(self):
        if self.category:
            return Category.get_parent_path_and_name(self.category)[1]

    def html_summary(self):
        content = self.content
        if CONFIG.SUMMARY_DELIMETER.search(content):
            summary = CONFIG.SUMMARY_DELIMETER.split(content, 1)[0]
        elif CONFIG.SUMMARY_DELIMETER2.search(content):
            summary = CONFIG.SUMMARY_DELIMETER2.split(content, 1)[0]
        else:
            summary = content
        return format_content(summary, self.format)

    def html_content(self):
        content = self.content
        if CONFIG.SUMMARY_DELIMETER.search(content):
            content = CONFIG.SUMMARY_DELIMETER.sub('', content, 1)
        elif CONFIG.SUMMARY_DELIMETER2.search(content):
            content = CONFIG.SUMMARY_DELIMETER2.split(content, 1)[1]
        return format_content(content, self.format)

    def _get_relative_keys(self, inserting=False):
        relative_keys = [
            PublicArticlePublishTime.KEY, ArticleUpdateTime.KEY,
            PrivateArticlePublishTime.KEY
        ]
        if inserting:
            relative_keys.append(ArticleURL.KEY)
            if self.category:
                relative_keys.extend(
                    [Category.KEY, CategoryArticle.KEY % self.category])
            if self.tags:
                relative_keys.extend(
                    [TagArticle.KEY % tag for tag in self.tags])
            if self.keywords:
                relative_keys.append(KeywordArticle.KEY)
        else:
            origin_data = self._origin_data

            old_url = origin_data.get('url')
            if old_url and old_url != self.url:
                relative_keys.append(ArticleURL.KEY)

            old_category = origin_data.get('category') or ''
            if old_category != self.category:
                relative_keys.append(Category.KEY)
                if old_category:
                    relative_keys.append(CategoryArticle.KEY % old_category)
                if self.category:
                    relative_keys.append(CategoryArticle.KEY % self.category)

            old_tags = origin_data.get('tags') or []
            if old_tags != self.tags:
                relative_keys.extend([
                    TagArticle.KEY % tag for tag in set(self.tags + old_tags)
                ])

            old_keywords = origin_data.get('keywords')
            if old_keywords != self.keywords:
                relative_keys.append(KeywordArticle.KEY)
        return relative_keys

    def _save_relative(self, redis_client, inserting=False):
        if inserting:
            ArticleURL(url=self.url, article_id=self.id).save(redis_client,
                                                              inserting=True)
            if self.category:
                CategoryArticle(category=self.category,
                                article_id=self.id,
                                time=self.pub_time).save(redis_client,
                                                         inserting=True)
            if self.tags:
                for tag_name in self.tags:
                    TagArticle(tag=tag_name,
                               article_id=self.id,
                               time=self.pub_time).save(redis_client,
                                                        inserting=True)
            if self.keywords:
                KeywordArticle(keywords=self.keywords,
                               article_id=self.id).save(redis_client,
                                                        inserting=True)
        else:
            origin_data = self._origin_data

            old_url = origin_data.get('url')
            if old_url and old_url != self.url:
                ArticleURL(url=self.url,
                           article_id=self.id).save(redis_client,
                                                    inserting=True)
                ArticleURL(url=old_url, article_id=None).save(redis_client)

            old_category = origin_data.get('category')
            if old_category != self.category:
                cache_keys = []
                if self.category:
                    CategoryArticle(category=self.category,
                                    article_id=self.id,
                                    time=self.pub_time).save(redis_client,
                                                             inserting=True)
                    cache_keys.append(CategoryArticles.KEY % self.category)
                if old_category:
                    CategoryArticle(category=old_category,
                                    article_id=self.id,
                                    time=None).save(redis_client)
                    cache_keys.append(CategoryArticles.KEY % old_category)
                if cache_keys:
                    redis_client.delete(*cache_keys)

            old_tags = origin_data.get('tags')
            if old_tags != self.tags:
                old_tag_set = set(old_tags)
                tag_set = set(self.tags)
                added_tag_set = tag_set - old_tag_set
                removed_tag_set = old_tag_set - tag_set
                for tag_name in added_tag_set:
                    TagArticle(tag=tag_name,
                               article_id=self.id,
                               time=self.pub_time).save(redis_client,
                                                        inserting=True)
                for tag_name in removed_tag_set:
                    TagArticle(tag=tag_name, article_id=self.id,
                               time=None).save(redis_client)

            old_keywords = origin_data.get('keywords')
            if old_keywords != self.keywords:
                if self.keywords:
                    KeywordArticle(keywords=self.keywords,
                                   article_id=self.id).save(redis_client,
                                                            inserting=True)
                if old_keywords:
                    KeywordArticle(keywords=old_keywords,
                                   article_id=self.id).delete(redis_client)

        if self.public:
            PublicArticlePublishTime(article_id=self.id,
                                     time=self.pub_time).save(redis_client)
            ArticleUpdateTime(article_id=self.id,
                              time=self.mod_time).save(redis_client)
            PrivateArticlePublishTime(article_id=self.id,
                                      time=None).save(redis_client)
        else:
            PublicArticlePublishTime(article_id=self.id,
                                     time=None).save(redis_client)
            ArticleUpdateTime(article_id=self.id, time=None).save(redis_client)
            PrivateArticlePublishTime(article_id=self.id,
                                      time=self.pub_time).save(redis_client)

    @classmethod
    def exist_url(cls, url):
        return ArticleURL.get_article_id_by_url(url) is not None

    @classmethod
    def get_by_url(cls, url):
        article_id = ArticleURL.get_article_id_by_url(url)
        if article_id:
            return cls.get_by_id(article_id)

    @classmethod
    def search(cls, date, url):
        article_ids = PublicArticlePublishTime.get_article_ids_by_data(date)
        if article_ids:
            articles = Article.get_by_ids(article_ids, public_only=True)
            if articles:
                if len(articles) == 1:
                    return articles[0].quoted_url()
                urls = [article.quoted_url() for article in articles]
                matched_urls = get_close_matches(url, urls, 1, 0)
                return matched_urls[0]

    @classmethod
    def get_articles_and_next_cursor(cls,
                                     article_ids_with_time,
                                     public_only=True,
                                     limit=CONFIG.ARTICLES_PER_PAGE):
        article_ids = [
            int(article_id) for article_id, timestamp in article_ids_with_time
        ]
        if len(article_ids) == limit:
            next_cursor = article_ids_with_time[-1][1]
        else:
            next_cursor = None
        articles = Article.get_by_ids(article_ids,
                                      filter_empty=True,
                                      public_only=public_only)
        return articles, next_cursor

    @classmethod
    def get_articles_for_homepage(cls,
                                  cursor=None,
                                  limit=CONFIG.ARTICLES_PER_PAGE):
        article_ids_with_time = PublicArticlePublishTime.get_article_ids(
            cursor, limit=limit)
        if article_ids_with_time:
            return cls.get_articles_and_next_cursor(article_ids_with_time,
                                                    limit=limit)
        return [], None

    @classmethod
    def get_unpublished_articles(cls,
                                 page,
                                 page_size=CONFIG.ARTICLES_PER_PAGE):
        article_ids = PrivateArticlePublishTime.get_article_ids_for_page(
            page, page_size)
        if article_ids:
            return cls.get_by_ids(article_ids, filter_empty=True)
        return []

    @classmethod
    def get_articles_count(cls, public=True):
        time_class = PublicArticlePublishTime if public else PrivateArticlePublishTime
        return time_class.get_count()

    @classmethod
    def get_articles_for_feed(cls, limit=CONFIG.ARTICLES_FOR_FEED):
        if CONFIG.SORT_FEED_BY_UPDATE_TIME:
            article_ids = ArticleUpdateTime.get_article_ids_for_page(1, limit)
        else:
            article_ids = PublicArticlePublishTime.get_article_ids(
                None, with_time=False, limit=limit)
        if article_ids:
            return cls.get_by_ids(article_ids, public_only=True)
        return []

    def get_previous_article(self):
        time_class = PublicArticlePublishTime if self.public else PrivateArticlePublishTime
        article_id = time_class.get_previous_article_id(self.pub_time)
        if article_id:
            return Article.get_by_id(article_id)

    def get_next_article(self):
        time_class = PublicArticlePublishTime if self.public else PrivateArticlePublishTime
        article_id = time_class.get_next_article_id(self.pub_time)
        if article_id:
            return Article.get_by_id(article_id)

    def get_nearby_articles(self):
        time_class = PublicArticlePublishTime if self.public else PrivateArticlePublishTime
        previous_article_id = time_class.get_previous_article_id(self.pub_time)
        next_article_id = time_class.get_next_article_id(self.pub_time)
        if previous_article_id:
            if next_article_id:
                return Article.get_by_ids(
                    (previous_article_id, next_article_id))
            else:
                return Article.get_by_id(previous_article_id), None
        else:
            if next_article_id:
                return None, Article.get_by_id(next_article_id)
            else:
                return None, None