def find_cluster(seg_snt, means): max_score = -10000.0 m_incex = -1 for i in range(len(means)): score = sim.cos(seg_snt.vector, means[i]) if score > max_score: max_score = score m_incex = i return m_incex, max_score
def calEntityPairScoreTransE(self, enIdx1, enIdx2): ''' Return TransE score: [('father': 0.9), ('mother': 0.5), ...] ''' e1 = self.train.get_all_entity()[enIdx1] e2 = self.train.get_all_entity()[enIdx2] all_relation = self.train.get_all_relation() # scores = [(all_relation[i], cos(self.E[all_relation[i]], minusVector(self.E[e2], self.E[e1]))) for i in range(len(self.R))] scores = [cos(self.E[all_relation[i]], minusVector(self.E[e2], self.E[e1])) for i in range(len(self.R))] return scores
def calEntityPairScoreTransE(self, enIdx1, enIdx2): ''' Return TransE score: [('father': 0.9), ('mother': 0.5), ...] ''' try: e1 = self.train.get_all_entity()[enIdx1] e2 = self.train.get_all_entity()[enIdx2] all_relation = self.train.get_all_relation() # print e1, e2, len(self.R), len(self.Rel) # scores = [(all_relation[i], cos(self.E[all_relation[i]], minusVector(self.E[e2], self.E[e1]))) for i in range(len(self.R))] scores = [cos(self.Rel[all_relation[i]], minusVector(self.Ent[e2], self.Ent[e1])) for i in range(len(self.R))] except Exception as e: print e print all_relation[e1], all_relation[e2] scores = [] return scores
def calc_doc_sim(sdoci, sdocj): return sim.cos(sdoci.vector, sdocj.vector)
print('B:', sim.support_mult(p_list, set2)) print('C:', sim.support_mult(p_list, set3)) print('D:', sim.support_mult(p_list, set4)) # naive bayes class_list = [0, 0, 0, 1, 1, 1, 2, 2, 2] indx_list = [0, 1] equal_to = [1, 0] p = sim.naive_bayes(p_list, class_list, 2, indx_list, equal_to, 3) print('p:', p) # similarity measures smc = sim.smc(P2, P6) j = sim.j_coeff(P2, P7) cos6 = sim.cos(P2, P6) cos7 = sim.cos(P2, P7) print('A:', smc > j) print('B:', smc > cos6) print('C:', cos7 > j) print('D:', cos6 > cos7) # confidence # X --> Y # a_left = X, a_right = Y a_left = [1, 3, 5] a_right = [0, 4] # lift, no script for this but very easy to calculate # with the given conf and support script
a_right = [hpH] print('conf:', sim.conf(o_list, a_left, a_right)) print('A:', sim.support_mult(o_list, A)) print('B:', sim.support_mult(o_list, B)) print('C:', sim.support_mult(o_list, C)) print('D:', sim.support_mult(o_list, D)) a = [1, 0, 1, 0, 0, 1] b = [1, 0, 1, 0, 1, 0] c = [0, 0, 0, 0, -1, 1] print('A:', sim.euclid_norm(c)) print('B:', sim.p_norm(c, 1) < sim.euclid_norm(c)) print('C:', sim.j_coeff(a, b)) print('D:', sim.cos(a, b) == sim.smc(a, b)) # high: z = 0 # low: z = 1 class_list = [1, 1, 1, 1, 0, 0, 0, 1] C = 2 indx_list = [hpL, am0] class_check = 0 p = sim.naive_bayes(o_list, class_list, class_check, indx_list, [1, 1], C) print('p:', p) p1 = [0, 0.2606, 1.1873, 2.4946, 2.9510, 2.5682, 3.4535, 2.4698] p2 = [0.2606, 0, 1.2796, 2.4442, 2.8878, 2.4932, 3.3895, 2.4216] p3 = [1.1873, 1.2796, 0, 2.8294, 3.6892, 2.9147, 4.1733, 2.2386] p4 = [2.4946, 2.4442, 2.8294, 0, 1.4852, 0.2608, 2.2941, 1.8926]
a_left = [0, 1, 2, 3, 4] a_right = [5] print('conf:', sim.conf(p_list, a_left, a_right)) indx_list = [0, 1] class_list = [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0] equal_to = [1, 1] p = sim.naive_bayes(p_list, class_list, 1, indx_list, equal_to, 2) print('p:', p) j = sim.j_coeff(P1, P3) smc = sim.smc(P1, P3) cos = sim.cos(P1, P3) print('A:', j < smc) print('B:', j > cos) print('C:', smc > cos) print('D:', cos == 3/15) print('d', sim.p_norm([-0.25, -0.25], 1)) e = [True, True, True, True, True] e1 = [False] * 20 print('ada:', sim.ada_boost([e + e1]))
root = [3, 1, 3] splits = [[1, 1, 0], [2, 0, 3]] print('dec:', sim.dec_tree_ce(root, splits)) o1 = [4, 7, 9, 5, 5, 5, 6] o2 = [4, 7, 7, 7, 3, 7, 8] o3 = [7, 7, 10, 6, 6, 4, 9] o4 = [9, 7, 10, 8, 6, 10, 9] o5 = [5, 7, 6, 8, 8, 6, 7] o6 = [5, 3, 6, 6, 8, 8, 11] o7 = [5, 7, 4, 10, 6, 8, 7] o8 = [6, 8, 9, 9, 7, 11, 7] o_list = [o1, o2, o3, o4, o5, o6, o7, o8] print('A:', sim.cos(o1, o3)) print('B:', sim.j_coeff(o1, o3)) print('C:', sim.smc(o1, o3)) class_list = [1, 1, 1, 1, 0, 0, 0, 0] print('knn:', sim.knn(o_list, class_list, 1, [0, 1])) ard = sim.knn_density(o1, 1) / sim.knn_density(o2, 1) print('ard:', ard) s1 = [0, 1, 1, 0, 1, 0] s2 = [0, 1, 1, 1, 0, 1] s3 = [1, 1, 1, 0, 1, 0] s4 = [1, 1, 1, 0, 1, 0] s5 = [0, 1, 1, 0, 1, 1] s6 = [0, 0, 1, 1, 1, 1]