def fix_dim(maxsize, mindim, word2ballDic=dict(), bPath='/Users/tdong/data/glove/glove.6B/glove.6B.50Xball', outputPath=""): """ :param maxsize: :param mindim: :param word2ballDic: :param bPath: :return: """ for bf in os.listdir(bPath): with open(os.path.join(bPath, bf), 'r') as ifh: wlst = ifh.readline().strip().split() ballv = [decimal.Decimal(ele) for ele in wlst] delta = maxsize - len(ballv) if delta > 0: assert len(wlst) < maxsize print(bf, len(wlst), ballv[-1]) vec = vec_norm(ballv[:-2] + [decimal.Decimal(mindim)] * delta) + ballv[-2:] word2ballDic[bf] = vec if outputPath: create_ball_file(bf, outputPath=bPath, word2ballDic=word2ballDic) return word2ballDic
def initialize_ball(root, addDim=[], L0=0.1, R0=0.1, word2vecDic=dict(), wscatCodeDic=dict(), word2ballDic=dict(), outputPath=None): """ :param root: :param addDim: :param L0: :param R0: :param word2vecDic: :param wscatCodeDic: :param word2ballDic: :param outputPath: :return: """ w2v = [ decimal.Decimal(ele * 100) for ele in get_word2vector(root, word2vecDic=word2vecDic) ] cpoint = w2v + [ele + 10 for ele in wscatCodeDic[root]] + addDim word2ballDic[root] = vec_norm(cpoint) + [L0, R0] if outputPath: create_ball_file(root, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic[root], word2ballDic
def initialize_ball(root, addDim=[], L0=0.1, R0=0.1, word2vecDic=dict(), wscatCodeDic=dict(), word2ballDic=dict(), outputPath=None): """ :param root: :param addDim: :param L0: :param R0: :param word2vecDic: :param wscatCodeDic: :param word2ballDic: :param outputPath: :return: """ w2v = [ decimal.Decimal(ele * 100) for ele in get_word2vector(root, word2vecDic=word2vecDic) ] # add catcode -> path from root to here e.g. 1 1 2 :> first tree, first child, second child cpoint = w2v + [ele + 10 for ele in wscatCodeDic[root]] + addDim word2ballDic[root] = vec_norm(cpoint) + [L0, R0] print(f"Initialize Ball for root: {root}") ball_generation_log.append( Log(key=root, operation=Operation.INITIALIZE, operation_args=[], vector=word2ballDic[root])) if outputPath: create_ball_file(root, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic[root], word2ballDic
def homothetic_recursive_transform_of_decendents(tree, root=None, rate=None, wsChildrenDic=dict(), word2ballDic=dict(), outputPath=None): """ for child of tree: homothetic_recursive_transform_of_decendents_by_name(child, rate=None, outputPath=outputPath) l1, r1 = word2ballDic[tree][-2:] l = np.sqrt(l1*l1 + deltaL*deltaL + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2])) newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL)) word2ballDic[tree] = list(newVec) + [l, r1] for child of tree: while True: delta = qsr_DC_degree_by_name(child, tree) if delta < 0: word2ballDic[tree][-2] += - delta*1.01 else: break create_ball_file(tree, outputPath=outputPath) :param tree: :param root: :param rate: :param wsChildrenDic: :param word2ballDic: :param outputPath: :return: """ if rate != 1: for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): word2ballDic = homothetic_recursive_transform_of_decendents( child, root=root, rate=rate, word2ballDic=word2ballDic, wsChildrenDic=wsChildrenDic, outputPath=outputPath) if tree == root: return word2ballDic l = decimal.Decimal(word2ballDic[tree][-2]) # l = word2ballDic[tree][-2] word2ballDic[tree][-2] = l * rate assert word2ballDic[tree][-2] != np.inf and word2ballDic[tree][-2] >= 0 word2ballDic[tree][-1] = l * rate - (l - word2ballDic[tree][-1]) * rate # word2ballDic[tree][-1] *= rate if outputPath: create_ball_file(tree, outputPath=outputPath, word2ballDic=word2ballDic) i, j, lst = 0, 0, get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic) for i in range(len(lst) - 1): j = i + 1 while j < len(lst): dcDelta = qsr_DC_degree(word2ballDic[lst[i]], word2ballDic[lst[j]]) if dcDelta < 0: print(lst[j], lst[i], j, i) word2ballDic = rotate_vector_till( lst[j], lst[i], word2ballDic=word2ballDic, logFile='word2ball.log') j += 1 if outputPath: for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): create_ball_file(child, outputPath=outputPath, word2ballDic=word2ballDic) for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): gap = 1 while True: delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree]) if delta < 0: print('delta:', delta) word2ballDic[tree][-1] += -delta + gap gap *= 10 else: break if outputPath: create_ball_file(tree, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic
def ratio_homothetic_DC_transform(curTree, refTree, wsChildrenDic=dict(), word2ballDic=dict(), outputPath=None, logFile=None): """ update curTree and all its children, that that they disconnect from refTree step 1 compute the ratio curTree central point P1, l1=|OP1|, radius r1, k = r1/l1 refTree cnetral point P0, l0=|OP0|, radius r0 (r0 + k*x)^2 = l0^2 + x^2 - 2*l0*x*cos\alpha x < (l0 + r0)/(1 - k) on the same line step 2 update the family of curTree :param curTree: :param refTree: :param wsChildrenDic: :param word2ballDic: :param outputPath: :param logFile: :return: """ ball1 = word2ballDic[curTree] l1, r1 = decimal.Decimal(ball1[-2]), decimal.Decimal(ball1[-1]) ball0 = word2ballDic[refTree] l0, r0 = decimal.Decimal(ball0[-2]), decimal.Decimal(ball0[-1]) k = r1 / l1 targetsin0 = 0.6 while k >= 1: print("assertion -1 k=", k) L, R = word2ballDic[curTree][-2:] print('Shifting...', curTree) LNew = R / decimal.Decimal(targetsin0) with open(logFile, 'a+') as wlog: wlog.write( " ".join(["shifting", str(curTree)] + [str(ele) for ele in word2ballDic[curTree][:-2]] + [str(LNew - L)])) wlog.write("\n") word2ballDic = shift_whole_tree_of(curTree, word2ballDic[curTree][:-2], LNew - L, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath) print('Ended of shifting...', curTree) ball1 = word2ballDic[curTree] l1, r1 = decimal.Decimal(ball1[-2]), decimal.Decimal(ball1[-1]) k = r1 / l1 targetsin0 *= 0.9 margin = 10 while True: assert word2ballDic[curTree][-2] != np.inf and word2ballDic[curTree][ -2] >= 0 ratio = decimal.Decimal(margin + l0 + r0) / decimal.Decimal( word2ballDic[curTree][-2] - word2ballDic[curTree][-1]) l = word2ballDic[curTree][-2] word2ballDic[curTree][-2] = l * ratio word2ballDic[curTree][-1] = l * ratio - ( l - word2ballDic[curTree][-1]) * ratio delta = qsr_DC_degree(word2ballDic[curTree], word2ballDic[refTree]) if delta > 0: break decimal.getcontext().prec += 10 margin *= 10 if outputPath: create_ball_file(curTree, outputPath=outputPath, word2ballDic=word2ballDic) with open(logFile, 'a+') as wlog: wlog.write(" ".join(["h**o", str(curTree)] + [str(ratio)])) wlog.write("\n") return ratio, word2ballDic
def shift_whole_tree_of(tree, deltaVec, deltaL, wsChildrenDic=dict(), word2ballDic=dict(), outputPath=None): """ :param tree: :param deltaVec: :param deltaL: :param wsChildrenDic: :param word2ballDic: :param outputPath: :return: for child of tree: shift_whole_tree_of(child, deltaVec, deltaL, outputPath=None) l1, r1 = word2ballDic[tree][-2:] l = np.sqrt(l1*l1 + deltaL*deltaL + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2])) newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL)) word2ballDic[tree] = list(newVec) + [l, r1] for child of tree: while True: delta = qsr_DC_degree_by_name(child, tree) if delta < 0: word2ballDic[tree][-2] += - delta*1.01 else: break create_ball_file(tree, outputPath=outputPath) """ for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): word2ballDic = shift_whole_tree_of(child, deltaVec, deltaL, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath) l1, r1 = word2ballDic[tree][-2:] l = np.sqrt(l1 * l1 + deltaL * deltaL + 2 * l1 * deltaL * vec_cos(deltaVec, word2ballDic[tree][:-2])) newVec = vec_norm( vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL)) word2ballDic[tree] = list(newVec) + [l, r1] i, j, lst = 0, 0, get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic) for i in range(len(lst) - 1): j = i + 1 while j < len(lst): dcDelta = qsr_DC_degree(word2ballDic[lst[i]], word2ballDic[lst[j]]) if dcDelta < 0: print(lst[j], lst[i], j, i) word2ballDic = rotate_vector_till(lst[j], lst[i], word2ballDic=word2ballDic, logFile='word2ball.log') j += 1 for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): gap = 1 while True: delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree]) if delta < 0: gap *= 2 word2ballDic[tree][-1] += -delta + gap else: break if outputPath: create_ball_file(tree, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic
def training_P_by_name(childName, atreeName, addDim=[], wsChildrenDic=dict(), word2vecDic=dict(), wscatCodeDic=dict(), word2ballDic=dict(), sep='.', outputPath="", logFile=None): """ :param childName: :param atreeName: :param addDim: :param wsChildrenDic: :param word2vecDic: :param wscatCodeDic: :param word2ballDic: :param sep: :param outputPath: :param logFile: :return: """ if childName.split(sep)[0] == atreeName.split(sep)[0]: BallLeaf = word2ballDic[childName] BallParent, word2ballDic = initialize_ball(atreeName, addDim=addDim, L0=L0, R0=R0, word2vecDic=word2vecDic, wscatCodeDic=wscatCodeDic, word2ballDic=word2ballDic, outputPath=outputPath) LeafO, ParentO = BallLeaf[:-2], BallParent[:-2] LeafL, LeafR = BallLeaf[-2], BallLeaf[-1] ParentL, ParentR = LeafL + LeafR + cgap, LeafR + LeafR + cgap + cgap BallParent = ParentO + [ParentL, ParentR] word2ballDic.update({atreeName: BallParent}) else: targetsin0 = 0.6 while True: BallLeaf = word2ballDic[childName] BallParent, word2ballDic = initialize_ball( atreeName, addDim=addDim, L0=L0, R0=R0, word2vecDic=word2vecDic, wscatCodeDic=wscatCodeDic, word2ballDic=word2ballDic, outputPath=outputPath) LeafO, ParentO = [decimal.Decimal(ele) for ele in BallLeaf[:-2]], \ [decimal.Decimal(ele) for ele in BallParent[:-2]] LeafL, LeafR = BallLeaf[-2], BallLeaf[-1] sin_beta = BallLeaf[-1] / BallLeaf[-2] delta = 1 - sin_beta * sin_beta if delta < 0: delta = 0 cos_beta = np.sqrt(delta) cos_alpha = np.dot( LeafO, ParentO) / np.linalg.norm(LeafO) / np.linalg.norm(ParentO) delta = 1 - cos_alpha * cos_alpha if delta < 0: delta = 0 sin_alpha = np.sqrt(delta) # begin alpha --> xalpha xalpha = sin_alpha / 25 yalpha = np.sqrt(1 - xalpha * xalpha) sin_xalpha = xalpha * cos_alpha + yalpha * sin_alpha delta = 1 - sin_xalpha * sin_xalpha if delta < 0: delta = 0 cos_xalpha = np.sqrt(delta) sin_alpha = sin_xalpha cos_alpha = cos_xalpha # end dOO = LeafL * decimal.Decimal(cos_beta) cos_alpha_beta = ( decimal.Decimal(cos_beta) * decimal.Decimal(cos_alpha) - decimal.Decimal(sin_beta) * decimal.Decimal(sin_alpha)) if cos_alpha_beta <= 0: # shift_one_family(root=childName, targetsin = targetsin0, outputPath=outputPath) L, R = word2ballDic[childName][-2:] print('Shifting...', childName) LNew = R / decimal.Decimal(targetsin0) with open(logFile, 'a+') as wlog: wlog.write(" ".join( ["shifting", str(childName)] + [str(ele) for ele in word2ballDic[childName][:-2]] + [str(LNew - L)])) wlog.write("\n") word2ballDic = shift_whole_tree_of( childName, word2ballDic[childName][:-2], LNew - L, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath) # check_P_for_child_parent_in_one_family(childName, ballPath=outputPath) checkResult = check_DC_for_sibilings_in_one_family(childName) if checkResult: print("check_DC_for_sibilings_in_one_family", childName, checkResult) targetsin0 *= 0.9 else: break ParentL = dOO / cos_alpha_beta assert ParentL > 0 and ParentL != np.inf ParentR = ParentL * ( decimal.Decimal(sin_alpha) * decimal.Decimal(cos_beta) + decimal.Decimal(cos_alpha) * decimal.Decimal(sin_beta)) + decimal.Decimal(0.1) BallParent = ParentO + [ParentL, ParentR] word2ballDic.update({atreeName: BallParent}) count = 0 while qsr_P_degree(word2ballDic[childName], word2ballDic[atreeName]) < 0: oldParentR, delta = ParentR, 10 ParentR += decimal.Decimal(2) - qsr_P_degree(word2ballDic[childName], word2ballDic[atreeName]) while oldParentR == ParentR: ParentR += delta delta *= 10 BallParent = ParentO + [ParentL, ParentR] word2ballDic.update({atreeName: BallParent}) # print('*', qsr_P_degree_by_name(childName, atreeName)) # print("**", qsr_P_by_name(childName, atreeName)) count += 1 # print('count', count) # assert qsr_P_by_name(childName, atreeName), childName+" - "+atreeName+": "+str(qsr_P_degree_by_name(childName, atreeName)) if outputPath: create_ball_file(atreeName, outputPath=outputPath, word2ballDic=word2ballDic) return BallParent, word2ballDic
def training_DC_by_name(childrenNames, wsChildrenDic=dict(), word2ballDic=dict(), outputPath=None, ordered=False, logFile=None): """ :param childrenNames: :param wsChildrenDic: :param word2ballDic: :param outputPath: :param maxsize: :param mindim: :param logFile: :return: """ dic = dict() for tree in childrenNames: dic[tree] = word2ballDic[tree][-2] dic0 = copy.deepcopy(dic) if ordered: lst = [(node, word2ballDic[node]) for node in childrenNames] else: lst = [(item[0], word2ballDic[item[0]]) for item in sorted(dic.items(), key=operator.itemgetter(1))] i = 0 while i < len(lst) - 1: # print('i:', i, ' in', len(lst)) j = i + 1 refTreeName = lst[i][0] while j < len(lst): curTreeName = lst[j][0] # print(curTreeName, refTreeName) targetsin0 = 0.6 while not qsr_DC(word2ballDic[curTreeName], word2ballDic[refTreeName]): ball1 = word2ballDic[curTreeName] l1, r1 = decimal.Decimal(ball1[-2]), decimal.Decimal(ball1[-1]) k = r1 / l1 if k == 1: L, R = word2ballDic[curTreeName][-2:] print('Shifting...', curTreeName) LNew = R / decimal.Decimal(targetsin0) with open(logFile, 'a+') as wlog: wlog.write(" ".join( ["shifting", str(tree)] + [str(ele) for ele in word2ballDic[tree][:-2]] + [str(LNew - L)])) wlog.write("\n") word2ballDic = shift_whole_tree_of( tree, word2ballDic[curTreeName][:-2], LNew - L, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath) # check_P_for_child_parent_in_one_family(tree, ballPath=outputPath) checkResult = check_DC_for_sibilings_in_one_family(tree) if checkResult: print("check_DC_for_sibilings_in_one_family", tree, checkResult) targetsin0 *= 0.9 ratio0, word2ballDic = ratio_homothetic_DC_transform( curTreeName, refTreeName, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath, logFile=logFile) ball_generation_log.append( Log(key=curTreeName, operation=Operation.SEPERATE, operation_args=[refTreeName], vector=word2ballDic[curTreeName])) assert ratio0 != -1 # assert qsr_DC_by_name(curTreeName, refTreeName, outputPath=outputPath) if outputPath: create_ball_file(curTreeName, outputPath=outputPath, word2ballDic=word2ballDic) j += 1 for tree in childrenNames: dic[tree] = word2ballDic[tree][-2] lst = [(item[0], word2ballDic[item[0]]) for item in sorted(dic.items(), key=operator.itemgetter(1))] i += 1 if "herd.n.02" in childrenNames and "gathering.n.01" in childrenNames: print('break') ##### # homothetic transformation ##### for child in childrenNames: ratio = word2ballDic[child][-2] / decimal.Decimal(dic0[child]) word2ballDic = homothetic_recursive_transform_of_decendents( child, root=child, rate=ratio, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath) return word2ballDic
def making_ball_contains(root, children, addDim=[], word2vecDic=dict(), wsChildrenDic=dict(), wscatCodeDic=dict(), word2ballDic=dict(), outputPath=None, logFile=None): """ :param root: :param children: :param addDim: :param wsChildrenDic: :param wscatCodeDic: :param word2ballDic: :param outputPath: :param logFile: :return: """ maxL = -1 flag = False while not flag: flag = True for childName in children: # todo P == parent? pBall, word2ballDic = training_P_by_name( childName, root, addDim=addDim, wsChildrenDic=wsChildrenDic, word2vecDic=word2vecDic, wscatCodeDic=wscatCodeDic, word2ballDic=word2ballDic, outputPath=outputPath, logFile=logFile) assert pBall != -1 if maxL == -1: # initialize maxL, minL_R maxL, minL_R = pBall[-2], decimal.Decimal( pBall[-2]) - decimal.Decimal(pBall[-1]) if maxL < pBall[-2]: maxL = pBall[-2] delta = decimal.Decimal(pBall[-2]) - decimal.Decimal(pBall[-1]) if delta <= 0: print('Shifting...mbc', root) with open(logFile, 'a+') as wlog: wlog.write( " ".join(["shifting", str(root)] + [str(ele) for ele in word2ballDic[root][:-2]] + [str(-delta)])) wlog.write("\n") word2ballDic = shift_whole_tree_of(root, word2ballDic[root][:-2], -delta, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath) flag = False break elif decimal.Decimal(pBall[-2]) - decimal.Decimal( pBall[-1]) < minL_R: minL_R = decimal.Decimal(pBall[-2]) - decimal.Decimal( pBall[-1]) word2ballDic[root] = word2ballDic[root][:-2] + [ maxL, maxL - minL_R + cgap ] if outputPath: create_ball_file(root, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic