def most_similar_catsent(self, svec, cvec, num, ident_cat = False): sims = zeros(self.pair_len, dtype=REAL) catsentvec_sim_sum(self, svec, cvec, sims) nearest = [] cat_ids = {} neighbors = argsort(sims)[::-1] for top_cand in neighbors: (sent_no, cat_no) = self.sent_cat_pair[top_cand] sent_id = self.sent_id_list[sent_no] cat_id = self.cat_id_list[cat_no] if not ident_cat or not cat_ids.has_key(cat_id): cat_ids[cat_id] = 1 nearest.append((sent_id,float(sims[top_cand]),cat_id)) if len(nearest) == num: break return nearest
def most_similar_catsent(self, svec, cvec, num, ident_cat=False): sims = zeros(self.pair_len, dtype=REAL) catsentvec_sim_sum(self, svec, cvec, sims) nearest = [] cat_ids = {} neighbors = argsort(sims)[::-1] for top_cand in neighbors: (sent_no, cat_no) = self.sent_cat_pair[top_cand] sent_id = self.sent_id_list[sent_no] cat_id = self.cat_id_list[cat_no] if not ident_cat or not cat_ids.has_key(cat_id): cat_ids[cat_id] = 1 nearest.append((sent_id, float(sims[top_cand]), cat_id)) if len(nearest) == num: break return nearest
def worker_infer(): while True: job = jobs.get() if job is None: break diff = 0. work = matutils.zeros_aligned(model1.layer1_size + 8, dtype=REAL) neu1 = matutils.zeros_aligned(model1.layer1_size + 8, dtype=REAL) for sent_tuple in job: cat_id_gold = sent_tuple[2] sent_vec1, cat_vec1 = model1.train_single_sent_id( [sent_tuple[0]], 20, work, neu1) sims1 = np.empty(model1.pair_len, dtype=REAL) catsentvec_sim_sum(model1, sent_vec1, cat_vec1, sims1) sent_vec2, cat_vec2 = model2.train_single_sent_id( [sent_tuple[0]], 20, work, neu1) sims2 = np.empty(model2.pair_len, dtype=REAL) catsentvec_sim_sum(model2, sent_vec2, cat_vec2, sims2) sims1 += sims2 #joint_catsentvec_sim_sum(pairtable, sent_vec1, cat_vec1, sent_vec2, cat_vec2, sims1) neighbors = np.argsort(sims1)[::-1] cat_ids = {} nearest = [] ident_cat = True for top_cand in neighbors: (sent_no, cat_no) = model1.sent_cat_pair[top_cand] cat_id = model1.cat_id_list[cat_no] if not ident_cat or not cat_ids.has_key(cat_id): cat_ids[cat_id] = 1 nearest.append(cat_id) if len(nearest) == topK: break diff += 1. if cat_id_gold in nearest else 0. print nearest, cat_id_gold confusion_mtx.setdefault(cat_id_gold, {}) confusion_mtx[cat_id_gold].setdefault(nearest[0], 0) confusion_mtx[cat_id_gold][nearest[0]] += 1 qout.put(diff)