Beispiel #1
0
	def find_var_opts_for_rules(self, goal_phrase, l_cat_names, l_rule_names, idb, var_obj_parent, calc_level):
		print('find_rules_matching_result rules for', goal_phrase)
		rphrase = self.__phrase_mgr.get_rphrase(goal_phrase)
		l_rperms = self.__phraseperms.get_perms(rphrase)
		num_poss_ret = len(self.__l_active_rules) * len(l_rperms)
		irule_arr = bitvecdb.intArray(num_poss_ret); rperms_ret_arr = bitvecdb.intArray(num_poss_ret)
		num_vars_ret_arr = bitvecdb.intArray(num_poss_ret)
		rperms_arr = utils.convert_intvec_to_arr(l_rperms)
		cat_arr, rid_arr, num_cats, num_rids  = 0, 0, 0, 0
		l_rcats, l_rids = [], []
		for cat_name in l_cat_names:
			cid = self.__d_rcats.get(cat_name, -1)
			if cid >= 0: l_rcats.append(cid)
			num_cats = len(l_rcats)
		if num_cats > 0: cat_arr = utils.convert_intvec_to_arr(l_rcats)
		for rule_name in l_rule_names:
			rid = self.__d_rnames.get(rule_name, -1)
			if rid >= 0: l_rids.append(rid)
			num_rids = len(l_rids)
		# if num_rids > 0:
		rid_arr = utils.convert_intvec_to_arr(l_rids)
		# bitvecdb.print_db_recs(self.__hcdb_rules, self.__el_bitvec_mgr.get_hcbdb())
		num_rules_found = bitvecdb.find_matching_rules_vo(	self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(),
															self.__el_bitvec_mgr.get_hcbdb(), irule_arr,
															num_vars_ret_arr, rperms_ret_arr, len(l_rperms),
															rperms_arr, num_cats, cat_arr, num_rids, rid_arr,
															False, -1)
		print('num_rules_found', num_rules_found)
		l_var_opt_objs = []
		for ifound in range(num_rules_found):
			l_var_opt_objs.append(self.find_var_opts(	idb, irule_arr[ifound], num_vars_ret_arr[ifound],
														rperms_ret_arr[ifound], var_obj_parent, calc_level))
		return l_var_opt_objs
Beispiel #2
0
	def get_cluster(self, l_phrase_bits):
		plen = len(l_phrase_bits) / self.__bitvec_size
		# Each cent is an array of el bitvecs, each cent is also an array of hd, one for each el
		num_recs = len(self.__l_cent_hd)
		ret_arr = bitvecdb.intArray(num_recs)
		null_arr = bitvecdb.intArray(0)
		num_ret = bitvecdb.get_thresh_recs(	self.__hcdb_cent, ret_arr, plen, null_arr,
											utils.convert_charvec_to_arr(l_phrase_bits), False, True)
		l_rcents = [ret_arr[i] for i in range(num_ret)]
		return l_rcents
Beispiel #3
0
	def test_rule(self, mpdbs, stmt, l_results, idb, l_rule_cats):
		phrase = utils.full_split(stmt)
		result_words = ''
		if l_results != []:
			result_words = ' '.join(utils.full_split(utils.convert_phrase_to_word_list(l_results[0][1:]))).lower()
		print('Testing rules for', phrase)
		rphrase = self.__phrase_mgr.get_rphrase(phrase)
		l_rperms = self.__phraseperms.get_perms(rphrase)
		# The maximum theoretical returns is the num of rules * the number of source perms
		num_poss_ret = len(self.__l_active_rules) * len(l_rperms)
		irule_arr = bitvecdb.intArray(num_poss_ret); rperms_ret_arr = bitvecdb.intArray(num_poss_ret)
		rperms_arr = utils.convert_intvec_to_arr(l_rperms)
		l_rule_cids = [self.get_cid(rule_cat) for rule_cat in l_rule_cats]
		num_vars_ret_arr = bitvecdb.intArray(num_poss_ret)
		# num_rules_found = bitvecdb.find_matching_rules(	self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(),
		# 												irule_arr, rperms_ret_arr, len(l_rperms), rperms_arr,
		# 												len(l_rule_cids), utils.convert_intvec_to_arr(l_rule_cids))
		num_rules_matched = 0
		num_rules_found = bitvecdb.find_matching_rules_vo(	self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(),
															self.__el_bitvec_mgr.get_hcbdb(), irule_arr,
															num_vars_ret_arr, rperms_ret_arr, len(l_rperms),
															rperms_arr, len(l_rule_cids), utils.convert_intvec_to_arr(l_rule_cids),
															0, utils.convert_intvec_to_arr([]), False, 0)
		print('num_rules_found', num_rules_found)
		# l_var_opt_objs = []
		# for ifound in range(num_rules_found):
		# 	l_var_opt_objs.append(self.find_var_opts(	idb, irule_arr[ifound], num_vars_ret_arr[ifound],
		# 												rperms_ret_arr[ifound], var_obj_parent, calc_level))
		# return l_var_opt_objs
		for iret in range(num_rules_found):
			# self.run_one_rule(irule_arr[iret], rperms_ret_arr[iret])
			irule = irule_arr[iret]
			bext, iactive = self.__l_active_rules[irule]
			rperm_ret = rperms_ret_arr[iret]
			num_vars_ret = num_vars_ret_arr[iret]
			if not bext:
				# assert False, 'lrules should be run just like ext rules'
				print('Will run learned rule', iactive, 'as standard rule', irule)
				# self.__lrule_mgr.test_rule(irule, rperm_ret, result_words, mpdbs, idb)
				# continue
			print('should run rule called', self.__l_names[irule])
			b_has_result, num_matched, ll_result_eids = self.run_one_rule(irule, rperm_ret, result_words, mpdbs, idb, num_vars_ret)
			if num_matched < 1:
				print('rule', irule, 'did not match the state of the story db for idb', idb)
				continue
			print('test rule produced: ', ' '.join([self.__el_bitvec_mgr.get_el_by_eid(el) for el in ll_result_eids[0]]))
			print('test expected result:', result_words)
			num_rules_matched += 1


		if num_rules_matched > 0:
			self.__test_stat_num_rules_found += 1
		else:
			self.__test_stat_num_rules_not_found += 1
		pass
Beispiel #4
0
 def init_db_for_cluster(self, cluster_min):
     l_buckets = [0, bitvec_size / 5, 2 * bitvec_size / 5, bitvec_size]
     buckets_arr = bitvecdb.intArray(len(l_buckets))
     for ib, bhd in enumerate(l_buckets):
         buckets_arr[ib] = bhd
     bitvecdb.set_hd_buckets(self.get_hcbdb(), len(l_buckets), buckets_arr)
     bitvecdb.set_cluster_min(self.get_hcbdb(), cluster_min)
Beispiel #5
0
 def get_closest_recs(self, k, phrase_eids, iskip, shrink=0):
     ret_arr, hds_arr, obits_arr = bitvecdb.intArray(k), bitvecdb.intArray(
         k), bitvecdb.charArray(k * bitvec_size)
     qdata = []
     for iel in phrase_eids:
         qdata += self.__el_nlb_mgr.get_bin_by_id(iel)
     num_ret = bitvecdb.get_closest_recs(
         self.__hcbdb, k, ret_arr, hds_arr, obits_arr, len(phrase_eids),
         utils.convert_charvec_to_arr(qdata), iskip, shrink)
     l_idexs_ret, l_hds_arr = [ret_arr[ir] for ir in range(num_ret)
                               ], [hds_arr[ir] for ir in range(num_ret)]
     nd_obits = np.array(
         [ord(obits_arr[ib]) for ib in range(num_ret * bitvec_size)],
         dtype=np.int8)
     nd_obits = np.reshape(nd_obits, (num_ret, bitvec_size))
     return l_idexs_ret, l_hds_arr, nd_obits
Beispiel #6
0
	def cluster_one_thresh(self, plen, recc_thresh):
		num_left = self.__bdb_all.init_num_left_buf(plen)
		cent_ret = bitvecdb.charArray(self.__bitvec_size*plen)
		hd_avg_ret, hd_thresh = bitvecdb.floatArray(1), bitvecdb.intArray(plen)
		l_clusters = []
		while num_left >= c_cluster_min:
			num_left_now = self.__bdb_all.get_cluster_seed(cent_ret, hd_avg_ret, hd_thresh, plen, recc_thresh)
			num_added = num_left - num_left_now
			print('py: cluster_one_thresh num_added is', num_added)
			num_left = num_left_now
			if num_added == 0: break
			l_cent = [ord(cent_ret[ib]) for ib in range(self.__bitvec_size*plen)]
			l_clusters.append(nt_cluster(l_cent=l_cent, hd=[hd_thresh[iel] for iel in range(plen)],
										 score=hd_avg_ret[0], num_hit=num_added))
		nd_hd_cluster, nd_num = np.zeros(len(l_clusters)), np.zeros(len(l_clusters))
		nd_centroids = np.zeros((len(l_clusters), self.__bitvec_size*plen), dtype=np.uint8)
		l_hd_thresh = []
		for icluster, cluster in enumerate(l_clusters):
			nd_hd_cluster[icluster] = cluster.score
			nd_num[icluster] = cluster.num_hit
			nd_centroids[icluster, :] = np.array(cluster.l_cent, dtype=np.uint8)
			l_hd_thresh.append(cluster.hd)
			# l_homog_score.append(hd_cluster)
		if (l_clusters == [] or np.sum(nd_num) == 0):
			final_score = 1000.0
		else:
			score = np.sum(np.multiply(nd_hd_cluster, nd_num)) / np.sum(nd_num)
			final_score = (nd_hd_cluster.shape[0] * score) + (num_left * 0.2) # think about the magic number and put it into a constant
		return final_score, nd_centroids, l_hd_thresh
Beispiel #7
0
 def get_el_hd_recs(self, pos, hd, el, num_cands, cands_arr):
     irec_arr = bitvecdb.intArray(num_cands)
     # el_bitvec = self.__el_bitvec_mgr.get_bin_by_id(eid).tolist()
     el_bitvec = self.__el_nlb_mgr.get_el_bin(el)
     num_ret = bitvecdb.get_el_hd_recs_by_list(
         self.__hcbdb, irec_arr, cands_arr, num_cands, pos, hd,
         utils.convert_charvec_to_arr(el_bitvec))
     return num_ret, irec_arr
Beispiel #8
0
 def get_close_recs(self, idb, plen, hd_thresh, l_qbits):
     num_ret, len_arr = self.get_plen_irecs(idb, plen)
     ret_arr = bitvecdb.intArray(num_ret)
     num_ret = bitvecdb.get_thresh_recs_by_list(
         self.__hcbdb, ret_arr, plen,
         utils.convert_intvec_to_arr(hd_thresh), len_arr, num_ret,
         utils.convert_charvec_to_arr(l_qbits))
     return num_ret, ret_arr
Beispiel #9
0
 def get_rperms_with_eid_at(self, idb, eid, pos, num_cands, cands_arr):
     # bufsize = len(self.__l_rperms)
     irec_arr = bitvecdb.intArray(num_cands)
     el_bitvec = self.__el_nlb_mgr.get_bin_by_id(eid)
     num_ret = bitvecdb.get_irecs_with_eid_by_list(
         self.__hcbdb, irec_arr, idb, pos, cands_arr, num_cands,
         utils.convert_charvec_to_arr(el_bitvec, bitvec_size))
     return num_ret, irec_arr
Beispiel #10
0
 def get_rec_rule_names(self, nd_cent, hd_thresh, plen, num_recs,
                        l_rule_names):
     iperm_arr = bitvecdb.intArray(num_recs)
     cent_arr = utils.convert_charvec_to_arr(nd_cent.tolist())
     hd_arr = utils.convert_intvec_to_arr(hd_thresh)
     num_ret = bitvecdb.get_cluster(self.__hcbdb, iperm_arr, num_recs,
                                    cent_arr, plen, hd_arr)
     l_ret = [
         l_rule_names[self.__l_phrase_rphrases[self.__l_perm_iphrase[
             iperm_arr[iperm]]]] for iperm in range(num_ret)
     ]
     return l_ret
Beispiel #11
0
 def get_irecs_with_eid(self, idb, eid, rphrase_src, l_rphrase_excl):
     bufsize = len(self.__l_rperms)
     irec_arr = bitvecdb.intArray(bufsize)
     el_bitvec = self.__el_nlb_mgr.get_bin_by_id(eid)
     num_ret = bitvecdb.get_irecs_with_eid(
         self.__hcbdb, irec_arr, idb, -1,
         utils.convert_charvec_to_arr(el_bitvec, bitvec_size))
     s_rphrases_close = set()
     for iret in range(num_ret):
         rphrase = self.__l_phrase_rphrases[self.__l_perm_iphrase[
             irec_arr[iret]]]
         if rphrase in [rphrase_src] + l_rphrase_excl: continue
         # phrase = self.__phraseperms.get_phrase(rphrase)
         # print(phrase)
         s_rphrases_close.add(rphrase)
     return s_rphrases_close
Beispiel #12
0
def convert_intvec_to_arr(bin, size=-1):
	if size == -1:
		size = len(bin)
	bin_arr = bitvecdb.intArray(size)
	for ib in range(size): bin_arr[ib] = int(bin[ib])
	return bin_arr
Beispiel #13
0
 def get_plen_irecs(self, idb, plen):
     bufsize = len(self.__l_rperms)
     irec_arr = bitvecdb.intArray(bufsize)
     num_ret = bitvecdb.get_plen_irecs(self.__hcbdb, irec_arr, plen, idb)
     return num_ret, irec_arr
Beispiel #14
0
	def find_var_opts(self, idb, irule, num_var_opts, rperm, var_obj_parent, calc_level):
		print('irule', irule, 'num vars ret', num_var_opts, 'for rperm', rperm)

		# num_var_opts = num_vars_ret_arr[ifound]
		iel_ret = bitvecdb.intArray(num_var_opts);
		ivar_ret = bitvecdb.intArray(num_var_opts)
		src_iphrase_ret = bitvecdb.intArray(num_var_opts);
		src_iel_ret = bitvecdb.intArray(num_var_opts)
		bitvecdb.matching_rule_get_opt(	self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(),
										self.__el_bitvec_mgr.get_hcbdb(),
										irule, rperm, iel_ret, ivar_ret,
										src_iphrase_ret, src_iel_ret, num_var_opts, True, -1)
		bitvecdb.init_vo(self.__hvos, irule, idb, -1, rperm)
		for ivar in range(num_var_opts):
			print('iel', iel_ret[ivar], 'ivar', ivar_ret[ivar], 'src iphrase', src_iphrase_ret[ivar],
				  'src iel', src_iel_ret[ivar])
			bitvecdb.add_ext_var(self.__hvos, ivar_ret[ivar], True, True, iel_ret[ivar], 0, ivar)
		bitvecdb.do_vo(self.__hvos)
		c_l_match_phrases = [];  l_map_to_obj_only = []
		num_c_match_phrases = bitvecdb.get_num_match_phrases(self.__hvos)
		num_rule_stages = bitvecdb.get_rule_num_phrases(self.__hvos)
		l_open_phrases = []
		for imatch in range(num_c_match_phrases):
			istage = bitvecdb.get_match_phrase_istage(self.__hvos, imatch)
			b_matched = bool(bitvecdb.get_match_phrase_b_matched(self.__hvos, imatch))
			num_phrase_els = bitvecdb.get_num_phrase_els(self.__hvos, imatch)
			match_phrase = []; b_all_obj = True; open_phrase = []
			for iel in range(num_phrase_els):
				i_def_type = bitvecdb.get_phrase_el_def_type(self.__hvos, imatch, iel)
				def_type = def_type_table[i_def_type]
				phrase_rval = bitvecdb.get_phrase_el_val(self.__hvos, imatch, iel)
				phrase_val = '(not found)' if phrase_rval == -1 else self.__el_bitvec_mgr.get_el_by_eid(phrase_rval)
				phrase_hd = bitvecdb.get_phrase_el_hd(self.__hvos, imatch, iel)
				# match_phrase.append([def_type, phrase_val])
				match_phrase.append(phrase_val)
				if def_type == rec_def_type.obj:
					open_phrase.append([rec_def_type.obj, phrase_val])
				elif def_type == rec_def_type.like:
					open_phrase.append([rec_def_type.like, phrase_val, phrase_hd])
					b_all_obj = False
				else:
					assert False, 'only rec_def_type obj and like should be possible in find_var_opts()'

			if b_all_obj:
				l_map_to_obj_only.append(len(c_l_match_phrases))
				c_l_match_phrases.append(nt_match_phrases(istage=istage, b_matched=b_matched, phrase=match_phrase,
														  b_result=self.__l_bresults[irule] and (istage==(num_rule_stages-1))))
			else:
				l_map_to_obj_only.append(-1)
				l_open_phrases.append(open_phrase)
		c_l_match_iphrase_combos = []
		num_c_combos = bitvecdb.get_num_combos(self.__hvos)
		c_combo_len = bitvecdb.get_combo_len(self.__hvos)
		for icombo in range(num_c_combos):
			one_combo = []; b_all_obj = True
			for ival in range(c_combo_len):
				i_combo_val = bitvecdb.get_combo_val(self.__hvos, icombo, ival)
				i_true_combo_val = l_map_to_obj_only[i_combo_val]
				if i_true_combo_val == -1:
					b_all_obj = False
				one_combo.append(i_true_combo_val)
			if b_all_obj:
				c_l_match_iphrase_combos.append(one_combo)
		return cl_var_match_opts(	irule, c_l_match_phrases, c_l_match_iphrase_combos,
									var_obj_parent, calc_level + 1, self.__l_bresults[irule],
									l_open_phrases)
Beispiel #15
0
	def run_one_rule(self, irule, src_rperm, result_words, mpdbs, idb, num_var_opts):
		iel_ret = bitvecdb.intArray(num_var_opts);
		ivar_ret = bitvecdb.intArray(num_var_opts)
		src_iphrase_ret = bitvecdb.intArray(num_var_opts);
		src_iel_ret = bitvecdb.intArray(num_var_opts)
		bitvecdb.matching_rule_get_opt(self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(),
											  self.__el_bitvec_mgr.get_hcbdb(),
											  irule, src_rperm, iel_ret, ivar_ret,
											  src_iphrase_ret, src_iel_ret, num_var_opts, False, 0)
		bitvecdb.init_vo(self.__hvos, irule, idb, -1, src_rperm, False, 0)
		for ivar in range(num_var_opts):
			print('iel', iel_ret[ivar], 'ivar', ivar_ret[ivar], 'src iphrase', src_iphrase_ret[ivar],
				  'src iel', src_iel_ret[ivar])
			bitvecdb.add_ext_var(self.__hvos, ivar_ret[ivar], True, True, iel_ret[ivar], 0, ivar)
		num_matched_ret = bitvecdb.intArray(1);
		b_has_result = bitvecdb.run_rule(self.__hvos, num_matched_ret)
		if not b_has_result:
			return False, num_matched_ret[0], []
		num_match_phrases = bitvecdb.get_num_match_phrases(self.__hvos)
		num_rule_stages = bitvecdb.get_rule_num_phrases(self.__hvos)
		result_iphrase = num_rule_stages - 1
		ll_result_eids = []
		for imatch in range(num_match_phrases):
			istage = bitvecdb.get_match_phrase_istage(self.__hvos, imatch)
			if istage != result_iphrase: continue
			b_matched = bool(bitvecdb.get_match_phrase_b_matched(self.__hvos, imatch))
			if not b_matched: continue
			num_phrase_els = bitvecdb.get_num_phrase_els(self.__hvos, imatch)
			match_phrase = []; b_all_obj = True; open_phrase = []
			ll_result_eids.append([])
			for iel in range(num_phrase_els):
				i_def_type = bitvecdb.get_phrase_el_def_type(self.__hvos, imatch, iel)
				def_type = def_type_table[i_def_type]
				assert def_type == rec_def_type.obj, 'Error! Run rule should produce matched phrases with b_match that has only rec_def_type.obj'
				eid = bitvecdb.get_phrase_el_val(self.__hvos, imatch, iel)
				phrase_val = '(not found)' if eid == -1 else self.__el_bitvec_mgr.get_el_by_eid(eid)
				# phrase_hd = bitvecdb.get_phrase_el_hd(self.__hvos, imatch, iel)
				# match_phrase.append([def_type, phrase_val])
				match_phrase.append(phrase_val)
				ll_result_eids[-1].append(eid)

		return b_has_result, num_matched_ret[0], ll_result_eids

		ll_phrase_data, ll_vars, ll_el_hds, = self.__lll_phrase_data[irule], self.__lll_vars[irule], self.__lll_el_hds[irule]
		ll_rperms_src, ll_rperms = [[src_rperm]], []
		print('run one rule:\n', mpdbs.get_bdb_story().print_db(self.__el_bitvec_mgr.get_hcbdb()))
		# assert False, 'this code should all run inside the c bitvec library'
		if self.__l_bresults[irule]:
			ll_close_phrase_data = ll_phrase_data[1:-1]; ll_close_el_hs = ll_el_hds[1:-1]
		else:
			ll_close_phrase_data = ll_phrase_data[1:]; ll_close_el_hs = ll_el_hds[1:]
		for i_phrase_close, (l_phrase, l_el_hds) in enumerate(zip(ll_close_phrase_data, ll_close_el_hs)):
			num_len_recs, irec_arr = mpdbs.get_bdb_story().get_plen_irecs(idb, len(l_phrase))
			for rperm_combo in ll_rperms_src:
				ll_eids = [self.__phraseperms.get_perm_eids(rperm1) for rperm1 in rperm_combo]
				# l_phrase_eids = [self.__phraseperms.get_perm_eids(rperm1) for el in l_phrase]
				iclose_vars = filter(lambda l: l[2] == (i_phrase_close + 1), ll_vars)
				num_match, match_arr = num_len_recs, irec_arr
				for iel, el_hd in enumerate(l_el_hds):
					# There can only be one var matching a dest, so we simply take the first from the list created by the filter
					l_one_var = filter(lambda l: l[3] == iel, iclose_vars)
					if l_one_var == []:
						num_match, match_arr = \
							mpdbs.get_bdb_story().get_el_hd_recs(	iel, el_hd, # int((1 - el_cd)*self.__bitvec_size),
																	l_phrase[iel], num_match, match_arr)
					else:
						one_var = l_one_var[0]
						src_eid = ll_eids[one_var[0]][one_var[1]]
						num_match, match_arr = \
							mpdbs.get_bdb_story().get_rperms_with_eid_at(idb, src_eid, one_var[3], num_match, match_arr)
					if num_match == 0:
						break
				for imatch in range(num_match):
					ll_rperms.append(rperm_combo + [mpdbs.get_bdb_story().get_rperm_from_iperm(match_arr[imatch])])
			if ll_rperms == []:
				return False, [], []
			ll_rperms_src = list(ll_rperms)
			ll_rperms = []

		# assert False, 'there should be no guarantee of a return'
		ll_result_eids = []
		if self.__l_bresults[irule]:
			iresult_vars = filter(lambda l: l[2] == len(ll_phrase_data)-1, ll_vars)
			l_result_eids = [self.__el_bitvec_mgr.get_el_id(el) for el in ll_phrase_data[-1]]
			for l_rperms in ll_rperms_src:
				ll_eids = [self.__phraseperms.get_perm_eids(rperm1) for rperm1 in l_rperms]
				l_result_eids_copy = list(l_result_eids)
				for var in iresult_vars:
					l_result_eids_copy[var[3]] = ll_eids[var[0]][var[1]]
				ll_result_eids.append(l_result_eids_copy)
		return True, ll_rperms_src, ll_result_eids