def calc_user_factor(conn_func): records = [] for parent_kind in CONF_MAP.keys(): record = get_record_factor(conn_func, parent_kind) if record is None or record.empty: continue records.append(record) records = pd.concat(records) def _calc_bayes(df): ufac = {} for idx, row in df.iterrows(): uid, status, score = row correct = status - 1 # 1/2 -> 0/1, 做错/做对 tags = [x.split(':') for x in score.split('|')] tags = [(int(tag), int(d)) for tag, d in tags] for tag, d in tags: if tag not in ufac: ufac[tag] = population_pri.copy() px = np.array(priori[:, d-1]).T ufac[tag] = ufac[tag] * (correct * px + (1 - correct) * (1 - px)) u_score = [] for tag, fac in ufac.iteritems(): u_score.append(str(tag) + ':' + str(fac.argmax() + 1)) return "|".join(u_score) user_factor = records.groupby(['uid']).apply(_calc_bayes).reset_index() if len(user_factor) == 0: user_factor = pd.DataFrame() user_factor.columns = ['uid', 'score'] return user_factor
def calc_user_factor(conn_func): records = [] for parent_kind in CONF_MAP.keys(): record = get_record_factor(conn_func, parent_kind) if record is None or record.empty: continue records.append(record) records = pd.concat(records) def _calc_bayes(df): ufac = {} for idx, row in df.iterrows(): uid, status, score = row correct = status - 1 # 1/2 -> 0/1, 做错/做对 tags = [x.split(':') for x in score.split('|')] tags = [(int(tag), int(d)) for tag, d in tags] for tag, d in tags: if tag not in ufac: ufac[tag] = population_pri.copy() px = np.array(priori[:, d - 1]).T ufac[tag] = ufac[tag] * (correct * px + (1 - correct) * (1 - px)) u_score = [] for tag, fac in ufac.iteritems(): u_score.append(str(tag) + ':' + str(fac.argmax() + 1)) return "|".join(u_score) user_factor = records.groupby(['uid']).apply(_calc_bayes).reset_index() if len(user_factor) == 0: user_factor = pd.DataFrame() user_factor.columns = ['uid', 'score'] return user_factor
def remove_unapproved_questions(conn_func, redis_db): fields = redis_db.hkeys(ENGLISH_QUESTION_FACTOR_KEY) keys_map = dict() for field in fields: qtype, qid = field.split(':') keys_map.setdefault(int(qtype), []).append(field) conn = conn_func() cursor = conn.cursor() count = 0 for qtype, conf in CONF_MAP.iteritems(): context_conf = conf['context_conf'] table = context_conf['table'] cid = context_conf['cid'] sql = 'select %s from %s where question_type=%s and state=%s' cursor.execute(sql % (cid, table, qtype, K_APPROVED)) pool = set([qid for qid, in cursor.fetchall()]) keys = keys_map.get(qtype, []) for key in keys: qid = int(key.split(':')[1]) if qid not in pool: redis_db.hdel(ENGLISH_QUESTION_FACTOR_KEY, key) count += 1 conn.close() print 'remove unapproved english questions: %d' % count
def remove_unapproved_questions(conn_func, banker_conn_func, redis_db): conn = conn_func() cursor = conn.cursor() banker_conn = banker_conn_func() banker_cursor = banker_conn.cursor() count = 0 for qtype, conf in CONF_MAP.iteritems(): context_conf = conf['context_conf'] table = context_conf['table'] cid = context_conf['cid'] sql = 'select %s from %s where question_type=%s and state=%s' banker_cursor.execute(sql % (cid, table, qtype, K_APPROVED)) pool = set([qid for qid, in banker_cursor.fetchall()]) keys = redis_db.keys(ENGLISH_QUESTION_FACTOR_KEY % (qtype, '*')) for key in keys: qid = int(key.split('/')[-1]) if qid not in pool: redis_db.delete(ENGLISH_QUESTION_FACTOR_KEY % (qtype, qid)) count += 1 conn.close() banker_conn.close() print 'remove unapproved questions: %d' % count