def load_wwang_parts(self): self.load_word_vec() self.qid2wids = iu.load_pickle(self.qid2wids_file) self.aid2wids = iu.load_pickle(self.aid2wids_file) print('load awids/qwids over') self.aid2vote = iu.load_pickle(self.aid2vote_file) self.aid2qu = iu.load_pickle(self.aid2qu_file) assert len(self.aid2vote) == len(self.aid2wids) == len(self.aid2qu) print('load qid/aid/uid over'.format(self.name))
def get_qid2auv_bert(self): qau_list = iu.load_pickle(self.fill('_mid', 'qau_list.pkl')) uid2uint = iu.load_pickle(self.fill('_mid', 'uid2uint_dict.pkl')) aid2pf16 = iu.load_pickle(self.fill('bert', 'aid2pf16.pkl')) aid2vote = iu.load_pickle(self.fill('aid2vote_dict.pkl')) print(len(qau_list), len(uid2uint), len(aid2pf16), len(aid2vote)) self.qid2auv_bert = dict() for qid, aid, uid in qau_list: if qid not in self.qid2auv_bert: self.qid2auv_bert[qid] = [], [], [] al, ul, vl = self.qid2auv_bert[qid] al.append(aid2pf16[aid]) ul.append(uid2uint[uid]) vl.append(aid2vote[aid])
def load_clu_init(self, embed_dim: int, topic_ratio: float = 1) -> np.ndarray: clu_init_file = self.get_clu_init_file(embed_dim, topic_ratio) return iu.load_pickle(clu_init_file)
def load_word2vec(self, embed_dim: int) -> Dict[str, np.ndarray]: word2vec_file = self.get_word2vec_file(embed_dim) return iu.load_pickle(word2vec_file)
def partition_qids(aid2qu_file, seed, pidx): class User: lookup = dict() def __init__(self, uid): self.uid = uid self.qs = list() self.rvs2qs = None def add_q(self, q): self.qs.append(q) def split_rvs_qs(self): self.rvs2qs = dict((x, list()) for x in RVS) for q in self.qs: self.rvs2qs[q.rvs].append(q) return self.rvs2qs @staticmethod def find(uid): if uid not in User.lookup: User.lookup[uid] = User(uid) return User.lookup[uid] class Ques: lookup = dict() def __init__(self, qid): self.qid = qid self.rvs = np.random.choice(RVS, p=[0.78, 0.11, 0.11]) def set_rvs(self, x): # assert x in RVS self.rvs = x @staticmethod def find(qid): if qid not in Ques.lookup: Ques.lookup[qid] = Ques(qid) return Ques.lookup[qid] np.random.seed(seed) for aid, (qid, uid) in iu.load_pickle(aid2qu_file).items(): ques = Ques.find(qid) user = User.find(uid) ques.add_user(user) user.add_q(ques) for i in range(10000): print(i, 'th trial') for uid, user in User.lookup.items(): rvs_quess = user.split_rvs_qs() rc, vc, sc = [len(rvs_quess[x]) for x in RVS] if rc < 3: print('recheck on ', uid) if vc > 0: user.rvs2quess[V][0].set_rvs(R) elif sc > 0: user.rvs2quess[S][0].set_rvs(R) can = True for uid, user in User.lookup.items(): rvs_quess = user.split_rvs_qs() counts = rc, vc, sc = [len(rvs_quess[x]) for x in RVS] if rc < 3 or vc < 1 or sc < 1: print(pidx, counts) can = False break if can: # print(pidx, 'done') rvs2qid = dict((x, list()) for x in RVS) for qid, ques in Ques.lookup.items(): rvs2qid[ques.rvs].append(qid) lenarr = [len(qids) for x, qids in rvs2qid.items()] print(lenarr, [s / sum(lenarr) for s in lenarr]) all_qids = list(Ques.lookup.keys()) rvs_qids = au.merge(rvs2qid.values()) print(len(all_qids), len(set(all_qids)), '; ', len(rvs_qids), len(set(rvs_qids))) assert len(all_qids) == len(rvs_qids) and set(all_qids) == set( rvs_qids) print('valid partition found') return rvs2qid
def load_cdong_full(self): self.load_word_vec() self.load_user_vec() self.rvs2qids = iu.load_pickle(self.rvs2qids_file) self.qid2qauv = iu.load_pickle(self.qid2qauv_file)
def load_bert_full(self): self.load_user_vec_bert() self.get_qid2auv_bert() self.rvs2qids = iu.load_pickle(self.rvs2qids_file)
def load_user_vec_bert(self): file = self.fill('bert', 'user_vec_bert.pkl') self.user_vec_bert = iu.load_pickle(file)
def load_user_vec(self): self.user_vec = iu.load_pickle(self.user_vec_file)
def load_word_vec(self): self.word_vec = iu.load_pickle(self.word_vec_file)