Beispiel #1
0
    def dump_question_factor(self, QTYPE, qinfo, solution, analysis_info,
                             knowledge_tree):
        res = []
        df = pd.merge(qinfo, solution[solution.qtype == QTYPE])
        df = pd.merge(df, analysis_info[analysis_info.qtype == QTYPE])
        df_group = df.groupby(['qtype', 'qid'])
        for name1, group1 in df_group:
            qtype, qid = name1
            difficulty = group1.difficulty.iloc[0]
            question_analysis = []
            for name2, group2 in group1.groupby(['sub_qid', 'solution_id']):
                sub_qid, solution_id = name2
                S = group2.sort(columns='analysis_id')[['ktag', 'mtag']]
                analysis_step = []
                f = lambda x: None if pd.isnull(x) else set(
                    map(int, x.split('|')))
                for _, row in S.iterrows():
                    ktag, mtag = row
                    analysis_step.append((f(ktag), f(mtag)))
                question_analysis.append(analysis_step)

            qfactor = SciQuestionFactor(difficulty)
            qfactor.add_factor(question_analysis)
            factor_value = qfactor.dump_factor()
            factor_key = self.QUESTION_FACTOR_KEY % (qtype, qid)
            flush_redis(self.redis_db, factor_key, factor_value)

            indices = self.get_question_knowledge_index(
                qfactor, knowledge_tree)
            res.append((qtype, qid, indices))
        return res
Beispiel #2
0
    def dump_question_factor(self, QTYPE, qinfo, solution,
                             analysis_info, knowledge_tree):
        res = []
        df = pd.merge(qinfo, solution[solution.qtype == QTYPE])
        df = pd.merge(df, analysis_info[analysis_info.qtype == QTYPE])
        df_group = df.groupby(['qtype', 'qid'])
        for name1, group1 in df_group:
            qtype, qid = name1
            difficulty = group1.difficulty.iloc[0]
            question_analysis = []
            for name2, group2 in group1.groupby(['sub_qid', 'solution_id']):
                sub_qid, solution_id = name2
                S = group2.sort(columns='analysis_id')[['ktag', 'mtag']]
                analysis_step = []
                f = lambda x: None if pd.isnull(x) else set(map(int, x.split('|')))
                for _, row in S.iterrows():
                    ktag, mtag = row
                    analysis_step.append((f(ktag), f(mtag)))
                question_analysis.append(analysis_step)

            qfactor = SciQuestionFactor(difficulty)
            qfactor.add_factor(question_analysis)
            factor_value = qfactor.dump_factor()
            factor_key = self.QUESTION_FACTOR_KEY % (qtype, qid)
            flush_redis(self.redis_db, factor_key, factor_value)

            indices = self.get_question_knowledge_index(qfactor, knowledge_tree)
            res.append((qtype, qid, indices))
        return res
Beispiel #3
0
def dump_question_inv_index(conn_func, redis_db, qinfo):
    valid_question_set = set()
    for k, _ in qinfo.iteritems():
        qtype, qid, _ = k
        valid_question_set.add((qtype, qid))

    origin = get_origin_info(conn_func)
    keys = redis_db.hkeys(CET_QUESTION_FEATURE_KEY)
    res = dict()
    removed_questions = set()
    for key in keys:
        qtype, qid, _ = map(int, key.split(':'))
        # 可能会因为各种莫名原因导致 redis 中残存有脏数据
        # 用本次计算的 qinfo 过滤一遍保证都是已入库题目
        if (qtype, qid) not in valid_question_set:
            removed_questions.add((qtype, qid))
            continue
        exam_type = origin.get(qtype, {}).get(qid, EXAM_TYPE_DEFAULT)
        if exam_type == EXAM_TYPE_DEFAULT:
            continue
        res.setdefault(exam_type, {}).setdefault(qtype, []).append(qid)

    for exam_type, v in res.iteritems():
        for qtype, qids in v.iteritems():
            key = CET_QUESTION_INV_INDEX_KEY % (exam_type, qtype)
            flush_redis(redis_db, key, qids)

    print 'remove %s questions' % len(removed_questions)
Beispiel #4
0
def dump_question_index(redis_db, question_index):
    idx = {}
    for exam_kind, qtype, qid in question_index:
        idx.setdefault((exam_kind, qtype), []).append(qid)
    for (exam_kind, qtype), qids in idx.iteritems():
        key = ENGLISH_QUESTION_INV_INDEX % (exam_kind, qtype)
        flush_redis(redis_db, key, qids)
Beispiel #5
0
def dump_question_index(redis_db, question_index):
    idx = {}
    for exam_kind, qtype, qid in question_index:
        idx.setdefault((exam_kind, qtype), []).append(qid)
    for (exam_kind, qtype), qids in idx.iteritems():
        key = ENGLISH_QUESTION_INV_INDEX % (exam_kind, qtype)
        flush_redis(redis_db, key, qids)
Beispiel #6
0
    def dump_question_factor(self, SUB_QTYPE, qinfo,
                             knowledge_info, knowledge_tree):
        res = []
        df = pd.merge(qinfo,
                      knowledge_info[knowledge_info.sub_type == SUB_QTYPE])
        df_group = df.groupby(['qtype', 'qid'])
        for name, group in df_group:
            qtype, qid = name
            difficulty = group.difficulty.iloc[0]
            S = group[['ktag', 'rktag']]
            ktag, rktag = set(), set()
            _f = lambda x: None if pd.isnull(x) else set(map(int, x.split('|')))
            for _, row in S.iterrows():
                ktag_set, rktag_set = map(_f, row)
                if ktag_set: ktag.update(ktag_set)
                if rktag_set: rktag.update(rktag_set)
            question_info = (ktag, rktag, difficulty)
            qfactor = ArtQuestionFactor()
            qfactor.add_factor(question_info)
            factor_value = qfactor.dump_factor()
            factor_key = self.QUESTION_FACTOR_KEY % (qtype, qid)
            flush_redis(self.redis_db, factor_key, factor_value)

            indices = self.get_question_knowledge_index(qfactor, knowledge_tree)
            res.append((qtype, qid, indices))
        return res
Beispiel #7
0
 def dump_user_factor(self):
     user_set = self.get_user_set()
     for uid in user_set:
         key = self.USER_FACTOR_KEY % uid
         ufac = convert_user_factor(self.redis_db, key)
         if ufac is None:
             continue
         adaptive_key = self.ADAPTIVE_USER_FACTOR_KEY % uid
         flush_redis(self.redis_db, adaptive_key, ufac)
Beispiel #8
0
 def dump_user_factor(self):
     user_set = self.get_user_set()
     for uid in user_set:
         key = self.USER_FACTOR_KEY % uid
         ufac = convert_user_factor(self.redis_db, key)
         if ufac is None:
             continue
         adaptive_key = self.ADAPTIVE_USER_FACTOR_KEY % uid
         flush_redis(self.redis_db, adaptive_key, ufac)
Beispiel #9
0
 def dump_concept_graph(self):
     k_str = json.dumps(self.knowledge_mat)
     m_str = json.dumps(self.method_mat)
     k_m_str = json.dumps(self.knowledge_method_mat)
     key = self.CONCEPT_GRAPH
     value = [k_str, m_str, k_m_str]
     flush_redis(self.redis_db, key, value)
     key = self.CONCEPT_GRAPH_TRANSPOSE
     value = map(transpose_matrix, value)
     flush_redis(self.redis_db, key, value)
Beispiel #10
0
    def dump_card_inv_index(self):
        """计算 concept_graph 中元素(顶点/边)到 card 的索引"""
        keys = self.get_all_card_factor_keys()
        res = {}
        for key in keys:
            card_type, card_id = key.split("/")[-2:]
            item_id = "%s:%s" % (card_type, card_id)
            k_mat = load_card_factor(self.redis_db, key)
            append_item_id(item_id, k_mat, res, "k", "k")

        for k1, v in res.iteritems():
            for k2, item_ids in v.iteritems():
                key = self.CONCEPT_CARD_INDEX % (k1, k2)
                flush_redis(self.redis_db, key, item_ids)
Beispiel #11
0
    def dump_card_inv_index(self):
        '''计算 concept_graph 中元素(顶点/边)到 card 的索引'''
        keys = self.get_all_card_factor_keys()
        res = {}
        for key in keys:
            card_type, card_id = key.split('/')[-2:]
            item_id = '%s:%s' % (card_type, card_id)
            k_mat = load_card_factor(self.redis_db, key)
            append_item_id(item_id, k_mat, res, 'k', 'k')

        for k1, v in res.iteritems():
            for k2, item_ids in v.iteritems():
                key = self.CONCEPT_CARD_INDEX % (k1, k2)
                flush_redis(self.redis_db, key, item_ids)
Beispiel #12
0
    def dump_question_inv_index(self):
        """计算 concept_graph 中元素(顶点/边)到 question 的索引"""
        keys = self.get_all_question_factor_keys()
        res = {}
        for key in keys:
            qtype, qid = key.split("/")[-2:]
            item_id = "%s:%s" % (qtype, qid)
            qfactor = load_question_factor(self.redis_db, key)
            k_mat, m_mat, k_m_mat, diff = qfactor
            append_item_id(item_id, k_mat, res, "k", "k")
            append_item_id(item_id, m_mat, res, "m", "m")
            append_item_id(item_id, k_m_mat, res, "k", "m")

        for k1, v in res.iteritems():
            for k2, item_ids in v.iteritems():
                key = self.CONCEPT_QUESTION_INDEX % (k1, k2)
                flush_redis(self.redis_db, key, item_ids)
Beispiel #13
0
    def dump_question_inv_index(self):
        '''计算 concept_graph 中元素(顶点/边)到 question 的索引'''
        keys = self.get_all_question_factor_keys()
        res = {}
        for key in keys:
            qtype, qid = key.split('/')[-2:]
            item_id = '%s:%s' % (qtype, qid)
            qfactor = load_question_factor(self.redis_db, key)
            k_mat, m_mat, k_m_mat, diff = qfactor
            append_item_id(item_id, k_mat, res, 'k', 'k')
            append_item_id(item_id, m_mat, res, 'm', 'm')
            append_item_id(item_id, k_m_mat, res, 'k', 'm')

        for k1, v in res.iteritems():
            for k2, item_ids in v.iteritems():
                key = self.CONCEPT_QUESTION_INDEX % (k1, k2)
                flush_redis(self.redis_db, key, item_ids)
Beispiel #14
0
def dump_result(result, redis_db):
    user_fac, item_fac, w, w0, accuracy = result
    print 'accuracy: %.4f' % accuracy
    for uid, fac in user_fac.iteritems():
        key = CET_MOCK_TEST_USER_FACTOR_KEY % uid
        flush_redis(redis_db, key, list(fac))
    for qid, fac in item_fac.iteritems():
        key = CET_MOCK_TEST_QUESTION_FACTOR_KEY % qid
        flush_redis(redis_db, key, list(fac))
    flush_redis(redis_db, CET_MOCK_TEST_MODEL_WEIGHT_KEY, list(w))
    redis_db.set(CET_MOCK_TEST_MODEL_BIAS_WEIGHT_KEY, w0)
Beispiel #15
0
def dump_result(result, redis_db):
    user_fac, item_fac, w, w0, accuracy = result
    print 'accuracy: %.4f' % accuracy
    for uid, fac in user_fac.iteritems():
        key = CET_MOCK_TEST_USER_FACTOR_KEY % uid
        flush_redis(redis_db, key, list(fac))
    for qid, fac in item_fac.iteritems():
        key = CET_MOCK_TEST_QUESTION_FACTOR_KEY % qid
        flush_redis(redis_db, key, list(fac))
    flush_redis(redis_db, CET_MOCK_TEST_MODEL_WEIGHT_KEY, list(w))
    redis_db.set(CET_MOCK_TEST_MODEL_BIAS_WEIGHT_KEY, w0)