Example #1
0
    def after_update(self, raw_post: Dict, values: SQLValuesToWrite, old_records: List[DataRecord], records: List[DataRecord]):
        for old_record, record in zip(old_records, records):
            manage_try_add = lambda column, op: ManageLog.add_by_post_changed(
                self, column, op, POST_TYPES.TOPIC, values, old_record, record
            )
            manage_try_add_with_diff = lambda column, op: ManageLog.add_by_post_changed(
                self, column, op, POST_TYPES.TOPIC, values, old_record, record, diff_func=diff
            )

            title_changed = manage_try_add('title', MOP.POST_TITLE_CHANGE)  # 管理日志:标题编辑
            content_changed = manage_try_add_with_diff('content', MOP.POST_CONTENT_CHANGE)  # 管理日志:正文编辑

            if title_changed or content_changed:
                post_stats_do_edit(record['id'], record['user_id'])
                Topic.update(edit_count=Topic.edit_count + 1).where(Topic.id == record['id']).execute()

            manage_try_add('state', MOP.POST_STATE_CHANGE)  # 管理日志:状态修改
            manage_try_add('visible', MOP.POST_VISIBLE_CHANGE)  # 管理日志:改变可见度
            manage_try_add('awesome', MOP.TOPIC_AWESOME_CHANGE)  # 管理日志:设置精华
            manage_try_add('sticky_weight', MOP.TOPIC_STICKY_WEIGHT_CHANGE)  # 管理日志:置顶权重
            manage_try_add('weight', MOP.TOPIC_WEIGHT_CHANGE)  # 管理日志:修改权重

            # 管理日志:移动板块
            if manage_try_add('board_id', MOP.TOPIC_BOARD_MOVE):
                post_stats_topic_move(old_record['board_id'], record['board_id'], record['id'])
Example #2
0
def get_summary(topic_path,
                summary_size=100,
                oracle="accept_reject",
                summarizer="sume",
                parser=None,
                language="english",
                rouge_dir="rouge/RELEASE-1.5.5/"):

    # relativize the topic path!!!!
    if topic_path.startswith("/"):
        relative_path = re.search('^(/)(.*)$', topic_path).group(2)
    else:
        relative_path = topic_path

    resolved_topic_path = path.normpath(
        path.join(path.expanduser("~"), ".ukpsummarizer",
                  path.normpath(relative_path)))
    topic = Topic(resolved_topic_path)
    docs = topic.get_docs()
    models = topic.get_models()

    if summarizer == "sume":
        sw = SumeWrap(language)
        summary = sw(docs, summary_size)
        return summary
    elif summarizer == "custom_weights":
        sw = SumeWrap(language)

    return "no summary for summarizer type %s" % summarizer
Example #3
0
 def _msg_data(self, msg):
     """ 统一返回格式 - 带上 comment, item
     """
     data = Message._msg_data(msg)
     if data['link_id']:
         topic = Topic.get(data['link_id'])
         topic = Topic._topic_data(topic)
         data['topic'] = topic
         tmp_user = model.user.get_user(user_id=topic['user_id'])  # 消息来源用户
         if not tmp_user:
             return None
         data['topic_user'] = {
             'id': tmp_user['id'],
             'nick': tmp_user['nick'],
             'portrait': tmp_user['portrait'],
             'type': tmp_user['user_type'],
             } if tmp_user else {}
     if data['from_uid']:
         tmp_user = model.user.get_user(user_id=data['from_uid'])  # 消息来源用户
         if not tmp_user:
             return None
         data['from_user'] = {
             'id': tmp_user['id'],
             'nick': tmp_user['nick'],
             'portrait': tmp_user['portrait'],
             'type': tmp_user['user_type'],
             } if tmp_user else {}
     data['class'] = 'message'
     return data
Example #4
0
 def get(self, id):
     query = ("SELECT id, name, url FROM topic" "WHERE id=%s")
     cursor = self.db.cursor()
     cursor.execute(query, (id))
     sql_result = cursor.fetchone()
     cursor.close()
     topic = Topic(sql_result[1], sql_result[2])
     topic.id = sql_result[0]
     return topic
Example #5
0
    def after_update(self, raw_post: Dict, values: SQLValuesToWrite,
                     old_records: List[DataRecord], records: List[DataRecord]):
        for old_record, record in zip(old_records, records):
            if 'content' in values:
                # 管理日志:正文编辑
                ManageLog.new(self.current_user, self.current_role,
                              POST_TYPES.TOPIC, record['id'],
                              record['user_id'], MOP.TOPIC_CONTENT_CHANGE,
                              None)
                Topic.update(edit_count=Topic.edit_count +
                             1).where(Topic.id == record['id']).execute()

            if 'title' in values:
                # 管理日志:标题编辑
                ManageLog.new(self.current_user, self.current_role,
                              POST_TYPES.TOPIC, record['id'],
                              record['user_id'], MOP.TOPIC_TITLE_CHANGE, None)

            # 管理日志:改变状态
            ManageLog.add_by_post_changed(self, 'state', MOP.POST_STATE_CHANGE,
                                          POST_TYPES.TOPIC, values, old_record,
                                          record)

            # 管理日志:改变可见度
            ManageLog.add_by_post_changed(self, 'visible',
                                          MOP.POST_VISIBLE_CHANGE,
                                          POST_TYPES.TOPIC, values, old_record,
                                          record)

            # 管理日志:移动板块
            if ManageLog.add_by_post_changed(self, 'board_id',
                                             MOP.TOPIC_BOARD_MOVE,
                                             POST_TYPES.TOPIC, values,
                                             old_record, record):
                statistic_move_topic(old_record['board_id'],
                                     record['board_id'], record['id'])

            # 管理日志:设置精华
            ManageLog.add_by_post_changed(self, 'awesome',
                                          MOP.TOPIC_AWESOME_CHANGE,
                                          POST_TYPES.TOPIC, values, old_record,
                                          record)

            # 管理日志:置顶权重
            ManageLog.add_by_post_changed(self, 'sticky_weight',
                                          MOP.TOPIC_STICKY_WEIGHT_CHANGE,
                                          POST_TYPES.TOPIC, values, old_record,
                                          record)

            # 管理日志:修改权重
            ManageLog.add_by_post_changed(self, 'weight',
                                          MOP.TOPIC_WEIGHT_CHANGE,
                                          POST_TYPES.TOPIC, values, old_record,
                                          record)
Example #6
0
    def __init__(self, summary_file):
        p, f = path.split(summary_file)
        self.topic = Topic(path.normpath(path.join(p, "..")))

        self.idx = None

        for i, (fn, t) in enumerate(self.topic.get_models()):
            if fn.startswith(summary_file):
                self.idx = i

        print(f, self.idx)
Example #7
0
 def get_paginated(self, page=1, items=20):
     query = ("SELECT id, name, url FROM " "topic LIMIT %s OFFSET %s;")
     cursor = self.db.cursor()
     cursor.execute(query, (items, items * (page - 1)))
     topics = cursor.fetchall()
     cursor.close()
     if len(topics) == 0:
         return False
     topic_models = []
     for t in topics:
         model = Topic(t[1], t[2])
         model.id = t[0]
         topic_models.append(model)
     return topic_models
Example #8
0
 def check_topic(self, item_id):
     if not item_id:
         raise HTTPError(404)
     item_id = int(item_id)
     i = Topic.get(item_id)
     if not i:
         raise HTTPError(404, 'can not found item with id=%d' % item_id)
Example #9
0
File: forum.py Project: fy0/Icarus
 def get(self):
     page = self.get_argument("p", "1")
     count, query = Topic.get_list()
     pagination_ret = pagination(count, query, config.TOPIC_PAGE_SIZE, page)
     self.render(
         "forum/recent.html", nav="index", topics_count=count, pagination=pagination_ret, page_url=self.request.path
     )
Example #10
0
File: board.py Project: fy0/Icarus
 def lasted_topic(cls, board):
     from model.topic import Topic
     try:
         return Topic.select().where(Topic.board == board)\
             .order_by(Topic.time.desc()).get()
     except Topic.DoesNotExist:
         return
    def test_build_formatted_list_of_topics(self):
        expectedTopic = Topic("expected", "", "", "")

        service.add(expectedTopic.title)

        topics = service.all()

        assert_that(topics).contains(expectedTopic)
Example #12
0
File: forum.py Project: fy0/Icarus
 def post(self):
     title = self.get_argument("title", "").strip()
     board = self.get_argument("board", None)
     content = self.get_argument("content", "").strip()
     if title and config.TITLE_LENGTH_MIN <= len(title) <= config.TITLE_LENGTH_MAX and Board.exists_by_pk(board):
         t = Topic.new(title, self.current_user() or 0, board, content)
         self.redirect(url_for("topic", t.id))
     else:
         # 非标准提交,不用过于客气
         self.redirect(url_for("topic_new"))
Example #13
0
def post_stats_do_comment(related_type, related_id, comment_id):
    # 需要同时更新被评论对象的数字和最后评论id
    def func(update, where):
        update['last_comment_id'] = comment_id

    post_stats_incr(PostStats.comment_count, related_id, 1, cb=func)

    # 如果被评论的是文章,需要更新板块数据
    if related_type == POST_TYPES.TOPIC:
        t = Topic.get_by_pk(related_id)
        post_stats_incr(PostStats.comment_count, t.board_id, 1, cb=func)
Example #14
0
File: forum.py Project: fy0/Icarus
 def get(self, topic_id):
     topic = Topic.get_by_pk(topic_id)
     if topic:
         topic.view_count_inc()
         count, user_topics = Topic.get_list_by_user(topic.user)
         user_topics = user_topics.limit(10)
         follow = JsDict()
         if self.current_user():
             follow.topic = Follow.exists(OBJECT_TYPES.TOPIC, topic_id, self.current_user())
             follow.author = Follow.exists(OBJECT_TYPES.USER, topic.user.id, self.current_user())
         self.render(
             "forum/topic.html",
             nav="index",
             page_title=page_title(topic.title, topic.board.title, "社区"),
             topic=topic,
             user_topics=user_topics,
             follow=follow,
         )
     else:
         self.write_error(404)
Example #15
0
File: user.py Project: fy0/Icarus
 def get(self, username):
     user = User.get_by_username(username)
     if user:
         page = self.get_argument('p', '1')
         count, query = Topic.get_list_by_user(user)
         pagination_ret = pagination(count, query, config.TOPIC_PAGE_SIZE, page)
         self.render('user/user_page.html', user=user, tab={'user_topic_page': 'active'}, count=count,
                     avatar_html=avatar_generate(username, user.avatar_color, 167),
                     pagination=pagination_ret, page_url=self.request.path)
     else:
         self.write_error(404)
Example #16
0
File: forum.py Project: fy0/Icarus
    def get(self, topic_id):
        topic = Topic.get_by_pk(topic_id)
        if not self.topic_check(topic):
            return

        self.render(
            "forum/topic_edit.html",
            nav="index",
            page_title=page_title("编辑主题 - %s" % topic.title, topic.board.title, "社区"),
            topic=topic,
            boards=Board.get_list(),
        )
Example #17
0
def get_topics():
    topics = TopicRepository(db)
    r = requests.get("https://forum.ubuntuusers.de/")
    content = r.content.decode('utf8')
    reg = "<tr\\sclass=\"entry.+?<a.{0,100}href=\"(.{0,100}?)\".+?>(.+?)<"
    topic_ex = re.compile(reg, re.MULTILINE | re.DOTALL)
    matches = topic_ex.finditer(content)
    for m in matches:
        topic = Topic(m.group(2), m.group(1))
        topics.add(topic)
        print("Added Topic: {topic}".format(topic=topic.name))
    return
Example #18
0
File: topic.py Project: 1y1n/Icarus
    async def before_insert(self, raw_post: Dict,
                            values_lst: List[SQLValuesToWrite]):
        values = values_lst[0]
        form = TopicNewForm(**raw_post)
        if not form.validate():
            return self.finish(RETCODE.FAILED, form.errors)
        values['user_id'] = self.current_user.id

        # 以下通用
        if not config.POST_ID_GENERATOR == config.AutoGenerator:
            values['id'] = config.POST_ID_GENERATOR().digest()
        values['time'] = int(time.time())
        values['weight'] = Topic.weight_gen()
Example #19
0
def getTopic(topicId):
    from model.topic import Topic
    c = get_db()
    t = text('SELECT topic_name FROM topic WHERE topic_id = :topicId')
    results = c.execute(t, topicId = topicId)
    for row in results:
        name = row['topic_name']

    comics = []
    tC = text('SELECT comic_id from topic_comic WHERE topic_id = :topicId')
    resultsComic = c.execute(tC, topicId=topicId)
    for rowCom in resultsComic:
        comics.append(getComic(rowCom['comic_id']))
    return Topic(topicId, name, comics)
Example #20
0
File: common.py Project: fy0/Icarus
    def get_object(cls, related_type, related_id):
        from model.user import User
        from model.topic import Topic
        from model.wiki import WikiItem

        if related_type == OBJECT_TYPES.USER:
            u = User.get_by_pk(related_id)
            if u: return u
        elif related_type == OBJECT_TYPES.TOPIC:
            t = Topic.get_by_pk(related_id)
            if t: return t
        elif related_type == OBJECT_TYPES.WIKI:
            w = WikiItem.get_by_pk(related_id)
            if w: return w
Example #21
0
File: forum.py Project: fy0/Icarus
    def post(self, topic_id):
        topic = Topic.get_by_pk(topic_id)
        if not self.topic_check(topic):
            return

        title = self.get_argument("title", "").strip()
        content = self.get_argument("content", "").strip()

        if title and config.TITLE_LENGTH_MIN <= len(title) <= config.TITLE_LENGTH_MAX:
            topic.edit({"title": title, "content": content}, self.current_user())
            self.messages.success("编辑成功")
            self.redirect(url_for("topic", topic.id))
        else:
            # 非标准提交,不用过于客气
            self.redirect(url_for("topic_new"))
Example #22
0
 def get(self, **kwargs):
     uid = kwargs['uid']
     mode = kwargs['mode']
     item_id = int(kwargs['item_id'])
     user = hqby.user.get_user_info(uid=uid)
     if not user:
         raise HTTPError(403, 'can not found this user')
     if mode not in ['topic','comment']:
         raise HTTPError(400,'type not topic or comment')
     if mode == 'topic':          
         #如果赞的是心得 先从缓存中找赞  没赞过的话就新建赞
         key = "0"+uid+str(item_id)
         zan = model.like.get_zan(uid,item_id,0,key)
         if not zan:             
             #更新心得里的like_num  新建赞  新建msg 
             item = Topic.update_topic(item_id, True, False)
             item_data = Topic._topic_data(item)
             zan = {'user_id':str(uid),'item_id':item_data['topic_id'],'item_type':0}
             zan = model.like.create_like(zan)
             msg = self.create_msg('TL', user, item_data)
             self.write({'status':1,'type':mode,'like_num':item_data['like_num'],'uid':uid,'like_id':zan['id'],'msg':msg})
         else:
             self.write({'status':0,'type':mode,'msg':'already liked'})
     elif mode == 'comment':       
         #如果赞的是评论 先从缓存中找赞  没赞过的话就新建赞
         key = "1"+uid+str(item_id)
         zan = model.like.get_zan(uid,item_id,1,key)
         if not zan:
             item = Comment.update_comment(item_id,True)
             zan = {'user_id':str(uid),'item_id':item['id'],'item_type':1}
             zan = model.like.create_like(zan)
             msg = self.create_msg('CL', user, item)
             self.write({'status':1,'type':mode,'like_num':item['like_num'],'uid':uid,'like_id':zan['id'],'msg':msg})
         else:
             self.write({'status':0,'type':mode,'msg':'already liked'})
     return
Example #23
0
File: forum.py Project: fy0/Icarus
 def get(self, board_id):
     board = Board.get_by_pk(board_id)
     if board:
         page = self.get_argument("p", "1")
         count, query = Topic.get_list_by_board(board)
         pagination_ret = pagination(count, query, config.TOPIC_PAGE_SIZE, page)
         self.render(
             "forum/board.html",
             nav="index",
             page_title=page_title(board.title, "社区"),
             board=board,
             topics_count=count,
             pagination=pagination_ret,
             page_url=self.request.path,
         )
     else:
         self.write_error(404)
Example #24
0
 def save_to_topics(self, rankings, file_name):
     """
     this method takes a tuple of rankings: (score, sentence) then saves it to the topic model
     """
     topic = Topic(file_name)
     foundCore = False
     for i in range(len(rankings)):
         score = rankings[i][0]
         sent = rankings[i][1]
         if not foundCore:
             if topic.set_idea(sent, score):
                 foundCore = True
             continue
         topic.add_supporting_idea(sent, score)
     topic.save_to_db()
     return topic
Example #25
0
class Summary(object):
    def __init__(self, summary_file):
        p, f = path.split(summary_file)
        self.topic = Topic(path.normpath(path.join(p, "..")))

        self.idx = None

        for i, (fn, t) in enumerate(self.topic.get_models()):
            if fn.startswith(summary_file):
                self.idx = i

        print(f, self.idx)

    def get_index(self):
        return self.idx

    def get_topic(self):
        return self.topic
Example #26
0
def es_update_topic(id):
    post: Topic = Topic.get_by_id(id)
    if not post: return
    u: User = User.get_by_id(post.user_id)
    if not u: return

    body = get_post_base_body(post)
    body.update({
        'user_nickname': u.nickname,
        'content': post.content,
        'brief': post.content[:100]
    })
    es.index(
        index=INDEX_NAME,
        doc_type="doc",
        id=to_hex(post.id),
        body=body
    )
Example #27
0
def statistic_add_comment(related_type, related_id, comment_id):
    # 关于原子更新
    # http://docs.peewee-orm.com/en/latest/peewee/querying.html#atomic-updates
    # s: Statistic = cls.get_by_pk(related_id)
    Statistic.update(last_comment_id=comment_id, comment_count=Statistic.comment_count + 1)\
        .where(Statistic.id == related_id)\
        .execute()
    Statistic24h.update(comment_count=Statistic24h.comment_count + 1)\
        .where(Statistic24h.id == related_id)\
        .execute()

    if related_type == POST_TYPES.TOPIC:
        t = Topic.get_by_pk(related_id)
        Statistic.update(last_comment_id=comment_id, comment_count=Statistic.comment_count + 1)\
            .where(Statistic.id == t.board_id)\
            .execute()
        Statistic24h.update(comment_count=Statistic24h.comment_count + 1)\
            .where(Statistic24h.id == t.board_id)\
            .execute()
Example #28
0
def update_all(reset=False):
    if reset:
        try:
            es.indices.delete(index=INDEX_NAME)
        except elasticsearch.exceptions.NotFoundError:
            pass
        create_index()

    for i in Topic.select(Topic.id):
        print('topic', to_hex(i.id))
        es_update_topic(i.id)

    for i in WikiArticle.select(WikiArticle.id):
        print('wiki', to_hex(i.id))
        es_update_wiki(i.id)

    for i in Comment.select(Comment.id):
        print('comment', to_hex(i.id))
        es_update_comment(i.id)
Example #29
0
def statistic_add_topic_click(topic_id, board_id=None):
    Statistic.update(click_count=Statistic.click_count + 1)\
        .where(Statistic.id == topic_id)\
        .execute()

    Statistic24h.update(click_count=Statistic24h.click_count + 1)\
        .where(Statistic24h.id == topic_id)\
        .execute()

    if not board_id:
        t = Topic.get_by_pk(topic_id)
        board_id = t.board_id

    Statistic.update(click_count=Statistic.click_count + 1)\
        .where(Statistic.id == board_id)\
        .execute()

    Statistic24h.update(click_count=Statistic24h.click_count + 1)\
        .where(Statistic24h.id == board_id)\
        .execute()
Example #30
0
    def get_post(cls, related_type, related_id):
        from model.user import User
        from model.topic import Topic
        from model.wiki import WikiItem

        if type(related_id) == POST_ID_GENERATOR:
            related_id = related_id.to_bin()

        if type(related_type) == str:
            related_type = int(related_type)

        if related_type == POST_TYPES.USER:
            u = User.get_by_pk(related_id)
            if u: return u
        elif related_type == POST_TYPES.TOPIC:
            t = Topic.get_by_pk(related_id)
            if t: return t
        elif related_type == POST_TYPES.WIKI:
            w = WikiItem.get_by_pk(related_id)
            if w: return w
Example #31
0
File: board.py Project: fy0/Icarus
 def topic_count(cls, board, start_time=0):
     from model.topic import Topic, TOPIC_STATE
     return Topic.select().where(Topic.time>=start_time, Topic.board == board).count()
Example #32
0
            # is_dataset
            d = DataSet(f)
            # unroll to get topics
            for t in d.get_topics():
                for (mf, mt) in t.get_models():
                    mf = path.normpath(mf)
                    pref = path.commonprefix([mf, iobasedir])
                    tn = mf[len(pref) + 1:]
                    print("shortened:", tn)
                    queue.append(mf)

                    # topics.append([t.get_name for t in d.get_topics()])

        elif path.exists(path.join(f, "task.json")):
            # is topic
            t = Topic(f)
            for (mf, mt) in t.get_models():
                mf = path.normpath(mf)
                pref = path.commonprefix([mf, iobasedir])
                tn = mf[len(pref) + 1:]
                print("shortened:", tn)
                queue.append(mf)
        elif path.exists(path.join(f, "..", "..", "task.json")) \
                and path.exists(f):
            # should be model
            queue.append(f)
        else:
            raise BaseException("Invalid file given.", f, " is neither a dataset nor a topic nor a model.")

        if args.max_models:
            queue = queue[:args.max_models]
 def add(self, topic_title):
     self.topics.append(Topic(topic_title, "", "", "", ""))
     return self.topics
Example #34
0
    def run(self, topic_path, size=None, max_iteration_count=25):
        log = logging.getLogger("GridSearch")

        interpretation_types = [
            'SimpleNgramFeedbackGraph',
            'WordEmbeddingGaussianFeedbackGraph',
            'BaselineFeedbackStore',
            'WordEmbeddingRandomWalkDiffusionFeedbackGraph',
            #            'WordEmbeddingEgoPrFeedbackGraph',
            #            'PageRankFeedbackGraph',
        ]

        random.shuffle(interpretation_types)

        if topic_path.startswith("/"):
            relative_path = re.search('^(/)(.*)$', topic_path).group(2)
        else:
            relative_path = topic_path

        topic = Topic(path.join(self.iobasedir, path.normpath(relative_path)))

        embeddings = self.__get_embeddings__(topic.get_language())
        run_id = hashlib.sha224(topic_path).hexdigest()
        outputdir = path.join(self.scores_dir, run_id)
        try:
            os.mkdir(outputdir)
        except:
            pass

        concept_embedder = ConceptEmbedder(embeddings)
        for itype in interpretation_types:
            if itype == 'WordEmbeddingGaussianFeedbackGraph':
                mass_reject = [4.0, 1.0, 0.0, -1.0, -4.0]
                mass_accept = [4.0, 1.0, 0.0, -1.0, -4.0]
                iterations_accept = [16, 128, 1024]
                iterations_reject = [2, 4, 8, 16, 64]
                cut_off_threshold = [0.998, 0.98, 0.9, 0.6, 0.4]

                combinations = list(
                    itertools.product(mass_reject, mass_accept,
                                      iterations_accept, iterations_reject,
                                      cut_off_threshold))
                random.shuffle(combinations)

                for (mr, ma, ia, ir, co) in combinations:
                    log.info(
                        "WordEmbeddingGaussianFeedbackGraph: %s %s %s %s %s" %
                        (mr, ma, ia, ir, co))
                    g = WordEmbeddingGaussianFeedbackGraph(
                        concept_embedder,
                        cut_off_threshold=co,
                        mass_reject=mr,
                        mass_accept=ma,
                        iterations_reject=ir,
                        iterations_accept=ia)

                    sir = SingleTopicRunner(self.iobasedir,
                                            self.rouge,
                                            scores_dir=outputdir)
                    sir.run(topic_path,
                            size,
                            feedbackstore=g,
                            summarizer="PROPAGATION",
                            preload_embeddings=embeddings)

            elif itype == 'WordEmbeddingRandomWalkDiffusionFeedbackGraph':
                mass_reject = [4.0, 1.0, 0.0, -1.0, -4.0]
                mass_accept = [4.0, 1.0, 0.0, -1.0, -4.0]
                iterations_accept = [128, 1024, 10000]
                iterations_reject = [64, 200, 5000]
                cut_off_threshold = [0.998, 0.98, 0.9, 0.6, 0.4]
                propagation_abort_threshold = [0.01, 0.1, 0.25, 0.5, 0.75, 0.9]

                combinations = list(
                    itertools.product(mass_reject, mass_accept,
                                      iterations_accept, iterations_reject,
                                      cut_off_threshold,
                                      propagation_abort_threshold))
                random.shuffle(combinations)

                for (mr, ma, ia, ir, co, pat) in combinations:
                    log.info(
                        "WordEmbeddingRandomWalkDiffusionFeedbackGraph: %s %s %s %s %s %s"
                        % (mr, ma, ia, ir, co, pat))
                    g = WordEmbeddingRandomWalkDiffusionFeedbackGraph(
                        concept_embedder,
                        mass_accept=ma,
                        mass_reject=mr,
                        iterations_accept=ia,
                        iterations_reject=ir,
                        cut_off_threshold=co,
                        propagation_abort_threshold=pat)

                    sir = SingleTopicRunner(self.iobasedir,
                                            self.rouge,
                                            scores_dir=outputdir)
                    sir.run(topic_path,
                            size,
                            feedbackstore=g,
                            summarizer="PROPAGATION",
                            preload_embeddings=embeddings)

            elif itype == "BaselineFeedbackStore":
                log.info("BaselineFeedbackStore")

                sir = SingleTopicRunner(self.iobasedir,
                                        self.rouge,
                                        scores_dir=outputdir)
                sir.run(topic_path,
                        size,
                        summarizer="PROPAGATION",
                        preload_embeddings=embeddings)

            elif itype == "PageRankFeedbackGraph":
                log.warning("interpretationtype not implementend. type: %s" %
                            (itype))
            elif itype == "SimpleNgramFeedbackGraph":
                window_size = [2, 3, 4, 5]
                factor_rejects = [1, 0, 0.05, 0.25, 0.5, 2, 4, 8]
                factor_accepts = [1, 0, 0.05, 0.25, 0.5, 2, 4, 8]
                stemmer = SnowballStemmer(topic.get_language())
                combinations = list(
                    itertools.product(window_size, factor_rejects,
                                      factor_accepts))
                random.shuffle(combinations)

                for (ws, fr, fa) in combinations:
                    log.info(
                        "SimpleNgramFeedbackGraph: (ws %s, fr %s, fa %s)" %
                        (ws, fr, fa))
                    g = SimpleNgramFeedbackGraph(stemmer,
                                                 topic.get_language(),
                                                 N=ws,
                                                 factor_reject=fr,
                                                 factor_accept=fa)

                    sir = SingleTopicRunner(self.iobasedir,
                                            self.rouge,
                                            scores_dir=outputdir)
                    sir.run(topic_path,
                            size,
                            feedbackstore=g,
                            summarizer="PROPAGATION",
                            preload_embeddings=embeddings)
            else:
                log.warning("Got wrong interpretationtype. ignoring type %s" %
                            (itype))
Example #35
0
 def get_topics(self):
     for file_name in os.listdir(self.root):
         topic_location = path.normpath(path.join(self.root, file_name))
         if not os.path.isdir(topic_location):
             continue
         yield Topic(topic_location)
Example #36
0
    def post(self, **kwargs):
        """
            新建一个评论 需要用户登录
        """
        token = self._get_token()
        uid = token['_id']
        user = hqby.user.get_user_info(uid=uid)
        if not user:
            raise HTTPError(403, 'can not found this user')
        try:
            params = json_decode(self.request.body)
        except ValueError:
            params = self.get_form()
        comment_type = params.get('comment_type', False)
        self.check_comment_type(comment_type)
        self.check_topic(params['topic_id'])
        params['user_id'] = uid
        # 添加一个新的评论
        comm = self.create_comment(params,comment_type)
       # self.write('haha = %s'%comm)
        #return
        data = self._comment_data(comm)
        data['is_liked'] = 0
        topic = Topic.update_topic(data['topic_id'],False,True)
        topic_data = Topic._topic_data(topic)
        if data['to_user_id']:
            to_uid = data['to_user_id']
            msg_type = 'CC'
            content = user['nick'] + '回复了你的评论 ' 
        else:
            to_uid = topic_data['user_id']
            msg_type = 'TC'
            content = user['nick'] + "评论了你的心得 " + "  \"%s\""%topic_data['content'] 
        if uid != to_uid:
            msg = Message.set_message(to_uid, msg_type, uid, content, topic_data['topic_id']) #link_id 是topic的id
            msg = Message._msg_data(msg)
        else:
            msg = 'you reply yourself'
         #给被回复者发送通知
        baidu_apiKey = baidu_push_configs['apiKey']
        baidu_secretKey = baidu_push_configs['secretKey']
        bind_info = hqby.user.get_bind_info(to_uid)
        baidu_uid, baidu_cid = bind_info.get('baidu_uid'), bind_info.get('baidu_cid')
        if baidu_uid and baidu_cid and uid != to_uid:
            message = {
                'title': '读经',
                'description': '%s回复了你,快去看看吧' % user['nick'].encode('utf8'),
                'open_type': 2,
                "aps": {
                    "alert": '%s回复了你,快去看看吧' % user['nick'].encode('utf8'),
	                "sound":"",
                	"badge":0
                    },
                }
            message = json.dumps(message)
            message_key = "sys"
            c = Channel(baidu_apiKey, baidu_secretKey, arr_curlOpts=dict(TIMEOUT=3, CONNECTTIMEOUT=5))
            push_type = 1   # 1-单个人, 2-一群人, 3-全部人
            optional = dict()
            optional[Channel.USER_ID] = baidu_uid
            optional[Channel.CHANNEL_ID] = int(baidu_cid)
            optional[Channel.MESSAGE_TYPE] = 1    # 0-消息, 1-通知
            optional['device_types'] = [3, 4]      # 4-ios, 3-安卓, 5-wp设备, 2-pc, 1-浏览器
            optional['deploy_status'] = 1 if configs['debug'] else 2     # 1-开发, 2-生产
            #job = c.pushMessage(push_type, message, message_key, optional)
            job = rq_client.default_queue.enqueue(c.pushMessage, push_type, message, message_key, optional)
            #logging.info('log for baidu pusher: %s', str(job))
        self.write({'comment':data,'topic':topic_data,'msg':msg})
        self.set_status(200)
        self.set_header('Content-Type', self._ct('json'))
Example #37
0
def post_stats_add_topic_click(topic_id, board_id=None):
    if not board_id:
        t = Topic.get_by_pk(topic_id)
        board_id = t.board_id
    post_stats_incr(PostStats.click_count, topic_id)
    post_stats_incr(PostStats.click_count, board_id)
Example #38
0
    def run(self,
            topic_path,
            size=None,
            summarizer="SUME",
            summary_idx=None,
            parser=None,
            oracle="accept",
            feedback_log=None,
            propagation=False,
            max_iteration_count=10,
            preload_embeddings=None,
            feedbackstore=None,
            override_results_files=False,
            num_clusters=8):
        log = logging.getLogger("SingleTopicRunner")

        sf = None  # just for the sake of being able to run without simulated feedback...
        self.tlog.debug("SingleTopicRunner started")
        # relativize the topic path!
        if type(topic_path) is Topic:
            topic = topic_path
        else:
            if topic_path.startswith("/"):
                relative_path = re.search('^(/)(.*)$', topic_path).group(2)
            else:
                relative_path = topic_path

            topic = Topic(
                path.join(self.iobasedir, path.normpath(relative_path)))
        language = topic.get_language()
        docs = topic.get_docs()
        summaries = topic.get_models()

        flightrecorder = get_flightrecorder_from_file(feedback_log)
        preceding_size = len(
            flightrecorder.records
        )  # the number of iterations that happened due to the provided feedback_log

        embeddings = None
        """
        if preload_embeddings:
            embeddings_path = path.normpath(path.join(self.iobasedir, "embeddings"))
            embeddings = load_w2v_embeddings(embeddings_path, language, 'active_learning')
        else:
            embeddings = preload_embeddings
        """

        if summary_idx is not None:
            summaries = [summaries[summary_idx]]

        if size is None:
            use_size = topic.get_summary_size()
        else:
            use_size = size

        clusters_path = path.join(self.iobasedir, 'clustering',
                                  '{}'.format(num_clusters))
        #print(clusters_path)
        #clusters = get_clusters(clusters_path, topic.docs_dir)

        if summarizer == "SUME":
            sw = SumeWrap(language)
            summary = sw(docs, use_size)
            outputfilecontents = {
                "summary": summary,
                "type": summarizer,
                "info_data": []
            }

            json_content = json.dumps(outputfilecontents)
            if self.out is not None:
                log.info("writing output to %s" % (self.out))
                write_to_file(json_content, self.out)
            write_to_file(
                json_content,
                path.normpath(
                    path.expanduser(
                        path.join(self.iobasedir, "tmp", "tmp.json"))))
        elif summarizer == "UPPER_BOUND":
            ub_summary = load_ub_summary(language,
                                         docs,
                                         summaries,
                                         use_size,
                                         base_dir=self.iobasedir)
            summary = '\n'.join(ub_summary)

            outputfilecontents = {
                "summary": summary,
                "type": summarizer,
                "info_data": []
            }

            json_content = json.dumps(outputfilecontents)
            if self.out is not None:
                log.info("writing output to %s" % (self.out))
                write_to_file(json_content, self.out)
            write_to_file(
                json_content,
                path.normpath(
                    path.expanduser(
                        path.join(self.iobasedir, "tmp", "tmp.json"))))
        elif summarizer == "PROPAGATION":
            #UB considering all the summaries
            ub_summary = load_ub_summary(language,
                                         docs,
                                         summaries,
                                         use_size,
                                         base_dir=self.iobasedir)
            summary = '\n'.join(ub_summary)
            ub_scores = self.rouge(summary, summaries, use_size)

            log.debug(
                "UB scores: R1:%s R2:%s SU4:%s" %
                (str(ub_scores[0]), str(ub_scores[1]), str(ub_scores[2])))

            ref_summ = random.choice(summaries)

            parse_info = []
            #parse_info = topic.get_parse_info(summaries.index(ref_summ))

            # initialize the Algorithm.
            run_config = dict()
            run_config['rank_subset'] = True
            run_config['relative_k'] = True
            run_config['dynamic_k'] = False
            for flag in ['adaptive_sampling', 'strategy']:
                run_config[flag] = False

            r = 0
            clusters = None
            log.info("recording k_size in summarize %f", self.k)
            #TODO: Added summaries instead of one single summary
            sf = SimulatedFeedback(
                language,
                self.rouge,
                embeddings=None,  #TODO: embeddings
                docs=docs,
                models=summaries,
                summary_length=use_size,
                oracle_type=oracle,
                ub_score=ub_scores,
                ub_summary=ub_summary,
                parser_type=parser,
                flightrecorder=flightrecorder,
                feedbackstore=feedbackstore,
                parse_info=parse_info,
                run_config=run_config,
                k=self.k,
                adaptive_window_size=r,
                clusters=clusters)

            if sf.embeddings is None or sf.embeddings == {}:
                embe_var = "none",
            else:
                if sf.embeddings.embedding_variant is None:
                    embe_var = "none"
                else:
                    embe_var = sf.embeddings.embedding_variant
            if feedbackstore is None:
                cfg = {"type": "Unconfigured default"}
            else:
                cfg = feedbackstore.get_config()

            rs = []
            for p, t in [ref_summ]:
                rs.append({"name": os.path.split(p)[1], "text": t})

            run_id_string = "%s-%s-%s-%s-%s-%s-%s-%s" % (
                oracle, summarizer, parser, embe_var, topic.get_dataset(),
                topic.get_name(), [item["name"]
                                   for item in rs], json.dumps(cfg))

            run_id = hashlib.sha224(run_id_string).hexdigest()
            filename = path.join(self.scores_storage_path,
                                 "result-%s.json" % (run_id))

            if (os.path.exists(filename) and self.out is None
                    and self.override_results_switch is False):
                log.info(
                    "Skipping run_id '%s' because the result file does already exist. config: %s"
                    % (run_id, run_id_string))
                return
            else:
                log.info("Doing %s iterations for run_id '%s'\n %s" %
                         (max_iteration_count, run_id, run_id_string))
                write_to_file("", filename)

            summary, confirmatory_summary, exploratory_summary = sf.run_full_simulation(
                max_iteration_count=max_iteration_count)

            recommendations, recom_sentences = sf.get_recommendations()

            derived_records = []
            # construct table-like array of feedbacks per iteration.
            for i, record in enumerate(sf.flight_recorder.records):
                for accept in record.accept:
                    derived_records.append({
                        "iteration": i,
                        "concept": accept,
                        "value": "accept"
                    })
                for reject in record.reject:
                    derived_records.append({
                        "iteration": i,
                        "concept": reject,
                        "value": "reject"
                    })
                for implicit_reject in record.implicit_reject:
                    derived_records.append({
                        "iteration": i,
                        "concept": implicit_reject,
                        "value": "implicit_reject"
                    })

            for item in recommendations:
                derived_records.append({
                    "iteration":
                    -1,
                    "concept":
                    item,
                    "value":
                    "recommendation",
                    "weight":
                    sf.summarizer.weights.get(item, 0.0),
                    "uncertainity":
                    sf.svm_uncertainity.get(item, -1.0)
                })

            result = {
                "config_run_id": run_id,
                "config_oracle_type": oracle,
                "config_summarizer_type": summarizer,
                "config_parse_type": str(parser),
                #"config_wordembeddings": emb_var,
                "config_feedbackstore": sf.feedbackstore.get_config(),
                "config_feedback_interpretation": {},
                "config_concept_recommendation": {},
                "dataset": topic.get_dataset(),
                "topic": topic.get_name(),
                "models": rs,
                "model_rougescores": {
                    "iteration": -1,
                    "ROUGE-1 R score": ub_scores[0],
                    "ROUGE-2 R score": ub_scores[1],
                    "ROUGE-SU* R score": ub_scores[2],
                    "accepted": [],
                    "accept_count": 0,
                    "rejected": [],
                    "reject_count": 0,
                    "summary": ub_summary
                },
                "result_summary": summary,
                "result_rougescores": sf.log_sir_info_data,
                "log_feedbacks": derived_records
            }

            r2 = [{
                "iteration": i,
                "summary": sf.log_info_data[i]
            } for i in range(len(sf.flight_recorder.records))]
            log.debug(
                "records: %s, infos %s, diff: %s" %
                (len(sf.flight_recorder.records), len(sf.log_info_data),
                 len(sf.flight_recorder.records) - len(sf.log_info_data)))

            write_to_file(json.dumps(result), filename)
            log.info("Writing results to %s" % (filename))

            df = pd.DataFrame(derived_records)
            filename = path.join(self.scores_storage_path,
                                 "flightrecorder-%s.csv" % (run_id))
            log.info("saving flightrecorder to %s with run_id %s" %
                     (filename, run_id))
            df.to_csv(filename, encoding="UTF-8")

            write_to_file(
                json.dumps(sf.new_debug_weights_history),
                path.join(
                    self.scores_storage_path,
                    "weightshistory-%s-%s-%s-%s.json" %
                    (topic.get_dataset(), topic.get_name(), summarizer,
                     run_id)))
            log.info("Writing weights history to %s" % (filename))
            weights_hist = pd.DataFrame(sf.new_debug_weights_history)

            filename = path.join(self.scores_storage_path,
                                 "weightshistory-%s.csv" % (run_id))
            weights_hist.to_csv(filename, encoding="UTF-8")

            log.debug("----------------------------------------------")
            log.debug(summary)
            log.debug(sf.log_info_data[-1])
            log.debug("----------------------------------------------")
            if self.pickle_store is not None:
                # Pickle dictionary using protocol 0.
                print('Pickle in file %s' % self.pickle_store)
                self.pickle_write(sf, self.pickle_store, log)

            json_content = self.write_summarize_output_json(
                sf, confirmatory_summary, derived_records, log,
                recom_sentences, result, run_id, summarizer, summary,
                self.pickle_store)
            # write_to_file(json_content, path.normpath(path.expanduser(path.join(self.iobasedir, "tmp", "tmp.json"))))
        else:
            raise BaseException("You should tell which summarizer to use")

        if sf is not None:
            write_details_file([sf.log_info_data],
                               path.join(self.iobasedir, "tmp", "tmp.csv"))
        self.tlog.debug("SingleTopicRunner finished")
Example #39
0
 def from_dict(old):
     args = old
     args['student'] = Student.get(id=args['student']['id'])
     args['topic'] = Topic.get(id=args['topic']['id'])
     return args