Example #1
0
 def add_perm(self, rphrase, rperm):
     iphrase = self.__d_rphrase_to_iphrase.get(rphrase, -1)
     assert iphrase != -1, 'Error. add_perm received for unknown rphrase: ' + str(
         rphrase)
     iperm = self.__d_rperm_to_iperm.get(rperm, -1)
     if iperm != -1 and iperm in self.__ll_iperms[iphrase]:
         return
     del iperm
     iperm_new = len(self.__l_rperms)
     if rperm in self.__l_rperms:
         print('Error. rperm', rperm,
               'passed to cl_bitvec_db already in __l_rperms')
         assert False, 'not dealt with yet'
     self.__l_rperms.append(rperm)
     self.__d_rperm_to_iperm[rperm] = iperm_new
     self.__ll_iperms[iphrase].append(iperm_new)
     self.__l_perm_iphrase.append(iphrase)
     l_perm_eids = self.__phraseperms.get_perm_eids(rperm)
     perm_len = len(l_perm_eids)
     self.__l_perm_len.append(perm_len)
     if perm_len > self.__max_perm_len:
         self.__max_perm_len = perm_len
     phrase_bitvec = []
     for iel in l_perm_eids:
         phrase_bitvec += self.__el_nlb_mgr.get_bin_by_id(iel)
         s_iperms = self.__d_iel_to_l_iperm.get(iel, set())
         s_iperms.add(iperm_new)
         self.__d_iel_to_l_iperm[iel] = s_iperms
         self.__nlb_mgr_notifier.notify_on_iel(iel)
     bitvecdb.add_rec(self.__hcbdb, len(l_perm_eids),
                      utils.convert_charvec_to_arr(phrase_bitvec))
Example #2
0
 def get_el_hd_recs(self, pos, hd, el, num_cands, cands_arr):
     irec_arr = bitvecdb.intArray(num_cands)
     # el_bitvec = self.__el_bitvec_mgr.get_bin_by_id(eid).tolist()
     el_bitvec = self.__el_nlb_mgr.get_el_bin(el)
     num_ret = bitvecdb.get_el_hd_recs_by_list(
         self.__hcbdb, irec_arr, cands_arr, num_cands, pos, hd,
         utils.convert_charvec_to_arr(el_bitvec))
     return num_ret, irec_arr
Example #3
0
 def get_close_recs(self, idb, plen, hd_thresh, l_qbits):
     num_ret, len_arr = self.get_plen_irecs(idb, plen)
     ret_arr = bitvecdb.intArray(num_ret)
     num_ret = bitvecdb.get_thresh_recs_by_list(
         self.__hcbdb, ret_arr, plen,
         utils.convert_intvec_to_arr(hd_thresh), len_arr, num_ret,
         utils.convert_charvec_to_arr(l_qbits))
     return num_ret, ret_arr
Example #4
0
 def get_rperms_with_eid_at(self, idb, eid, pos, num_cands, cands_arr):
     # bufsize = len(self.__l_rperms)
     irec_arr = bitvecdb.intArray(num_cands)
     el_bitvec = self.__el_nlb_mgr.get_bin_by_id(eid)
     num_ret = bitvecdb.get_irecs_with_eid_by_list(
         self.__hcbdb, irec_arr, idb, pos, cands_arr, num_cands,
         utils.convert_charvec_to_arr(el_bitvec, bitvec_size))
     return num_ret, irec_arr
Example #5
0
	def get_cluster(self, l_phrase_bits):
		plen = len(l_phrase_bits) / self.__bitvec_size
		# Each cent is an array of el bitvecs, each cent is also an array of hd, one for each el
		num_recs = len(self.__l_cent_hd)
		ret_arr = bitvecdb.intArray(num_recs)
		null_arr = bitvecdb.intArray(0)
		num_ret = bitvecdb.get_thresh_recs(	self.__hcdb_cent, ret_arr, plen, null_arr,
											utils.convert_charvec_to_arr(l_phrase_bits), False, True)
		l_rcents = [ret_arr[i] for i in range(num_ret)]
		return l_rcents
Example #6
0
 def get_rec_rule_names(self, nd_cent, hd_thresh, plen, num_recs,
                        l_rule_names):
     iperm_arr = bitvecdb.intArray(num_recs)
     cent_arr = utils.convert_charvec_to_arr(nd_cent.tolist())
     hd_arr = utils.convert_intvec_to_arr(hd_thresh)
     num_ret = bitvecdb.get_cluster(self.__hcbdb, iperm_arr, num_recs,
                                    cent_arr, plen, hd_arr)
     l_ret = [
         l_rule_names[self.__l_phrase_rphrases[self.__l_perm_iphrase[
             iperm_arr[iperm]]]] for iperm in range(num_ret)
     ]
     return l_ret
Example #7
0
    def iel_bitvec_changed(self, iel, bitvec):
        s_iperms = self.__d_iel_to_l_iperm.get(iel, set())
        for iperm in s_iperms:
            rperm = self.__l_rperms[iperm]
            l_perm_eids = self.__phraseperms.get_perm_eids(rperm)
            phrase_bitvec = []
            for iel in l_perm_eids:
                phrase_bitvec += self.__el_nlb_mgr.get_bin_by_id(iel)
            bitvecdb.change_rec(self.__hcbdb, len(l_perm_eids),
                                utils.convert_charvec_to_arr(phrase_bitvec),
                                iperm)

        pass
Example #8
0
 def get_irecs_with_eid(self, idb, eid, rphrase_src, l_rphrase_excl):
     bufsize = len(self.__l_rperms)
     irec_arr = bitvecdb.intArray(bufsize)
     el_bitvec = self.__el_nlb_mgr.get_bin_by_id(eid)
     num_ret = bitvecdb.get_irecs_with_eid(
         self.__hcbdb, irec_arr, idb, -1,
         utils.convert_charvec_to_arr(el_bitvec, bitvec_size))
     s_rphrases_close = set()
     for iret in range(num_ret):
         rphrase = self.__l_phrase_rphrases[self.__l_perm_iphrase[
             irec_arr[iret]]]
         if rphrase in [rphrase_src] + l_rphrase_excl: continue
         # phrase = self.__phraseperms.get_phrase(rphrase)
         # print(phrase)
         s_rphrases_close.add(rphrase)
     return s_rphrases_close
Example #9
0
 def get_closest_recs(self, k, phrase_eids, iskip, shrink=0):
     ret_arr, hds_arr, obits_arr = bitvecdb.intArray(k), bitvecdb.intArray(
         k), bitvecdb.charArray(k * bitvec_size)
     qdata = []
     for iel in phrase_eids:
         qdata += self.__el_nlb_mgr.get_bin_by_id(iel)
     num_ret = bitvecdb.get_closest_recs(
         self.__hcbdb, k, ret_arr, hds_arr, obits_arr, len(phrase_eids),
         utils.convert_charvec_to_arr(qdata), iskip, shrink)
     l_idexs_ret, l_hds_arr = [ret_arr[ir] for ir in range(num_ret)
                               ], [hds_arr[ir] for ir in range(num_ret)]
     nd_obits = np.array(
         [ord(obits_arr[ib]) for ib in range(num_ret * bitvec_size)],
         dtype=np.int8)
     nd_obits = np.reshape(nd_obits, (num_ret, bitvec_size))
     return l_idexs_ret, l_hds_arr, nd_obits
Example #10
0
	def process_clusters(self):
		print('Cluster closest els:')
		for plen, l_cent_hd_thresh in reversed(list(enumerate(self.__ll_cent_hd_thresh))):
			for i_lencent, hd_thresh in enumerate(l_cent_hd_thresh):
				irec = len(self.__ll_centroids)
				close_phrase = []
				for iel in range(plen):
					# print('iel', iel, 'plen:', plen, 'i_lencent', i_lencent, 'len', len(self.__l_nd_centroids[plen][i_lencent])) # , 'bits:', self.__l_nd_centroids[plen][i_lencent])
					word = self.__nlb_mgr.dbg_closest_word(self.__l_nd_centroids[plen][i_lencent][iel*self.__bitvec_size:(iel+1)*self.__bitvec_size])
					close_phrase.append(word)
					# print(close_phrase)
				print('rcent:', irec, 'plen:', plen, ', hd:', hd_thresh, ',', close_phrase)
				self.__ll_centroids.append(np.reshape(self.__l_nd_centroids[plen][i_lencent], -1).tolist())
				self.__l_cent_hd.append(hd_thresh)
				bitvecdb.add_rec(self.__hcdb_cent, plen,
								 utils.convert_charvec_to_arr(self.__ll_centroids[-1]))
				hd_arr = utils.convert_intvec_to_arr(hd_thresh)
				bitvecdb.set_hd_thresh(self.__hcdb_cent, irec, hd_arr, len(hd_thresh))