def make_DC_for_first_level_children(root="*root*", firstChild='entity.n.01', wsChildrenDic=dict(), outputPath='', maxsize=0, mindim=0, word2ballDic=dict(), logFile=None): """ :param root: :param firstChild: :param wsChildrenDic: :param outputPath: :param maxsize: :param mindim: :param word2ballDic: :param logFile: :param checking: :return: """ children = get_children(root, wsChildrenDic=wsChildrenDic) # children.remove(firstChild) # children.insert(0, firstChild) print('updating first level children...') word2ballDic = training_DC_by_name(children, outputPath=outputPath, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, ordered=True, logFile=logFile) return word2ballDic
def check_DC_for_sibilings_in_one_family(root="*root*", wsChildrenDic=dict(), word2ballDic=dict()): """ :param root: :param wsChildrenDic: :param word2ballDic: :return: """ lst = [root] checkResult = [] while lst: parent = lst.pop() children = get_children(parent, wsChildrenDic=wsChildrenDic) lst += children if len(children) < 2: continue i, j = 0, 0 while i < len(children): j = i + 1 while j < len(children): if not qsr_DC(word2ballDic[children[i]], word2ballDic[children[j]]): print(children[i], children[j], 'violates condition 4') # print('shall >=0', str(qsr_DC_degree(word2ballDic[children[i]], word2ballDic[children[j]]))) # return [root] checkResult.append((children[i], children[j])) j += 1 i += 1 return checkResult
def check_P_for_child_parent_in_one_family(root=None, wsChildrenDic=dict(), word2ballDic=dict(), ballPath=""): """ :param root: :param wsChildrenDic: :param word2ballDic: :param ballPath: :return: """ lst = [root] while lst: parent = lst.pop() pBall = get_ball_from_file(parent, ballPath=ballPath) # word2ballDic[parent] children = get_children(parent, wsChildrenDic=wsChildrenDic) lst += children for child in children: chBall = get_ball_from_file( child, ballPath=ballPath) # word2ballDic[child] if not qsr_P(word2ballDic[child], word2ballDic[parent]): print(child, parent, 'violates condition 3') dis = dis_between_ball_centers(chBall, pBall) print('dis:', dis) print('r1', chBall[-1]) print('R', pBall[-1]) print('shall >=0', pBall[-1] - dis - chBall[-1]) # assert qsr_P_by_name(child, parent), str(qsr_P_degree_by_name(child, parent)) return [root] # print("passed checking ", root, ' for part of') return []
def testing_whole_family(outputPath=None, wsChildrenDic=dict(), word2ballDic=dict(), outputBallFile=None): """ :param outputPath: :param wsChildrenDic: :param word2ballDic: :param outputBallFile: :return: """ print( "checking whether the tree structure is perfectly encoded in nball embeddings...\n" ) failed_P, failed_DC = [], [] maxsize, mindim, word2ballDic = load_balls(ipath=outputPath, word2ballDic=word2ballDic) for froot in get_children('*root*', wsChildrenDic=wsChildrenDic): failed_P += check_P_for_child_parent_in_one_family( froot, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, ballPath=outputPath) failed_DC += check_DC_for_sibilings_in_one_family( root='*root*', wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic) print("failed families with P", failed_P) print("failed families with DC", failed_DC) if failed_P == [] and failed_DC == []: print("the tree structure is perfectly encoded in nball embeddings.\n") print("generating nball embedding file...\n") merge_balls_into_file(ipath=outputPath, outfile=outputBallFile) else: print( "the tree structure is NOT perfectly encoded in nball embeddings.\n" ) print("try again, or contact the author")
def homothetic_recursive_transform_of_decendents(tree, root=None, rate=None, wsChildrenDic=dict(), word2ballDic=dict(), outputPath=None): """ for child of tree: homothetic_recursive_transform_of_decendents_by_name(child, rate=None, outputPath=outputPath) l1, r1 = word2ballDic[tree][-2:] l = np.sqrt(l1*l1 + deltaL*deltaL + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2])) newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL)) word2ballDic[tree] = list(newVec) + [l, r1] for child of tree: while True: delta = qsr_DC_degree_by_name(child, tree) if delta < 0: word2ballDic[tree][-2] += - delta*1.01 else: break create_ball_file(tree, outputPath=outputPath) :param tree: :param root: :param rate: :param wsChildrenDic: :param word2ballDic: :param outputPath: :return: """ if rate != 1: for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): word2ballDic = homothetic_recursive_transform_of_decendents( child, root=root, rate=rate, word2ballDic=word2ballDic, wsChildrenDic=wsChildrenDic, outputPath=outputPath) if tree == root: return word2ballDic l = decimal.Decimal(word2ballDic[tree][-2]) # l = word2ballDic[tree][-2] word2ballDic[tree][-2] = l * rate assert word2ballDic[tree][-2] != np.inf and word2ballDic[tree][-2] >= 0 word2ballDic[tree][-1] = l * rate - (l - word2ballDic[tree][-1]) * rate # word2ballDic[tree][-1] *= rate if outputPath: create_ball_file(tree, outputPath=outputPath, word2ballDic=word2ballDic) i, j, lst = 0, 0, get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic) for i in range(len(lst) - 1): j = i + 1 while j < len(lst): dcDelta = qsr_DC_degree(word2ballDic[lst[i]], word2ballDic[lst[j]]) if dcDelta < 0: print(lst[j], lst[i], j, i) word2ballDic = rotate_vector_till( lst[j], lst[i], word2ballDic=word2ballDic, logFile='word2ball.log') j += 1 if outputPath: for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): create_ball_file(child, outputPath=outputPath, word2ballDic=word2ballDic) for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): gap = 1 while True: delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree]) if delta < 0: print('delta:', delta) word2ballDic[tree][-1] += -delta + gap gap *= 10 else: break if outputPath: create_ball_file(tree, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic
def shift_whole_tree_of(tree, deltaVec, deltaL, wsChildrenDic=dict(), word2ballDic=dict(), outputPath=None): """ :param tree: :param deltaVec: :param deltaL: :param wsChildrenDic: :param word2ballDic: :param outputPath: :return: for child of tree: shift_whole_tree_of(child, deltaVec, deltaL, outputPath=None) l1, r1 = word2ballDic[tree][-2:] l = np.sqrt(l1*l1 + deltaL*deltaL + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2])) newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL)) word2ballDic[tree] = list(newVec) + [l, r1] for child of tree: while True: delta = qsr_DC_degree_by_name(child, tree) if delta < 0: word2ballDic[tree][-2] += - delta*1.01 else: break create_ball_file(tree, outputPath=outputPath) """ for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): word2ballDic = shift_whole_tree_of(child, deltaVec, deltaL, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath) l1, r1 = word2ballDic[tree][-2:] l = np.sqrt(l1 * l1 + deltaL * deltaL + 2 * l1 * deltaL * vec_cos(deltaVec, word2ballDic[tree][:-2])) newVec = vec_norm( vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL)) word2ballDic[tree] = list(newVec) + [l, r1] i, j, lst = 0, 0, get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic) for i in range(len(lst) - 1): j = i + 1 while j < len(lst): dcDelta = qsr_DC_degree(word2ballDic[lst[i]], word2ballDic[lst[j]]) if dcDelta < 0: print(lst[j], lst[i], j, i) word2ballDic = rotate_vector_till(lst[j], lst[i], word2ballDic=word2ballDic, logFile='word2ball.log') j += 1 for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): gap = 1 while True: delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree]) if delta < 0: gap *= 2 word2ballDic[tree][-1] += -delta + gap else: break if outputPath: create_ball_file(tree, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic
def training_all_families(root="*root*", wsChildrenDic=dict(), word2vecDic=dict(), wscatCodeDic=dict(), word2ballDic=dict(), outputPath=None, logFile=None, checking=False): """ :param root: :param wsChildrenDic: :param word2vecDic: :param wscatCodeDic: :param word2ballDic: :param outputPath: :param logFile: :param checking: :return: """ global L0, DIM children = get_children(root, wsChildrenDic=wsChildrenDic) child0 = 'entity.n.01' children = sorted(children, key=lambda ele: np.dot( get_word2vector(child0, word2vecDic=word2vecDic), get_word2vector(ele, word2vecDic=word2vecDic))) print(children) N = int(np.ceil(np.log(len(children)))) open(logFile, 'w+') while children: child = children.pop() k = 512 addDim0 = list(bin(N))[2:][:DIM] if len(addDim0) < DIM: addDim0 += [0] * (DIM - len(addDim0)) addDim = [int(ele) * 2 - 1 for ele in addDim0] addDim = [ele * k for ele in addDim] print("***", child) with open(logFile, 'a+') as wlog: wlog.write(" ".join([ str(ele) for ele in [child] + addDim + [time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())] ])) wlog.write("\n") word2ballDic = training_one_family(root=child, addDim=addDim, wsChildrenDic=wsChildrenDic, word2vecDic=word2vecDic, wscatCodeDic=wscatCodeDic, word2ballDic=word2ballDic, outputPath=outputPath, logFile=logFile) children = sorted(children, key=lambda ele: np.dot( get_word2vector(child, word2vecDic=word2vecDic), get_word2vector(ele, word2vecDic=word2vecDic))) print("finished training of all families\n") if checking: print("checking each family\n") maxsize, mindim, word2ballDic = load_balls(ipath=outputPath, word2ballDic=word2ballDic) failed_P, failed_DC = [], [] for child in get_children(root): failed_P += check_P_for_child_parent_in_one_family( child, word2ballDic=word2ballDic, wsChildrenDic=wsChildrenDic, ballPath=outputPath) failed_DC += check_DC_for_sibilings_in_one_family( root=child, word2ballDic=word2ballDic, wsChildrenDic=wsChildrenDic) print("failed families with P", failed_P) print("failed families with DC", failed_DC) return word2ballDic
def training_one_family(treeStruc=None, root=None, addDim=[], wsChildrenDic=dict(), word2vecDic=dict(), wscatCodeDic=dict(), word2ballDic=dict(), outputPath=None, logFile=None): """ :param treeStruc: :param root: :param addDim: :param wsChildrenDic: :param word2vecDic: :param wscatCodeDic: :param word2ballDic: :param outputPath: :param logFile: :return: """ if treeStruc: children = treeStruc[root] else: children = get_children(root, wsChildrenDic=wsChildrenDic) if len(children) > 0: for child in children: word2ballDic = training_one_family(treeStruc=treeStruc, root=child, addDim=addDim, wsChildrenDic=wsChildrenDic, word2vecDic=word2vecDic, wscatCodeDic=wscatCodeDic, word2ballDic=word2ballDic, outputPath=outputPath, logFile=logFile) # children shall be separated if len(children) > 1: # print('training dc of root', root) word2ballDic = training_DC_by_name(children, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath, logFile=logFile) word2ballDic = making_ball_contains(root, children, addDim=addDim, word2vecDic=word2vecDic, wsChildrenDic=wsChildrenDic, wscatCodeDic=wscatCodeDic, word2ballDic=word2ballDic, outputPath=outputPath, logFile=logFile) ball_generation_log.append( Log(key=root, operation=Operation.CONTAIN, operation_args=children, vector=word2ballDic[root])) return word2ballDic else: ball, word2ballDic = initialize_ball(root, addDim=addDim, L0=L0, R0=R0, word2vecDic=word2vecDic, word2ballDic=word2ballDic, wscatCodeDic=wscatCodeDic, outputPath=outputPath) return word2ballDic