def training_all_families(root="*root*", wsChildrenDic=dict(), word2vecDic=dict(), wscatCodeDic=dict(),
                          word2ballDic=dict(),
                          outputPath=None, logFile=None, checking = False):
    """
    :param root:
    :param wsChildrenDic:
    :param word2vecDic:
    :param wscatCodeDic:
    :param word2ballDic:
    :param outputPath:
    :param logFile:
    :param checking:
    :return:
    """
    global L0, DIM
    children = get_children(root, wsChildrenDic=wsChildrenDic)
    child0= 'entity.n.01'
    children = sorted(children, key=lambda ele: np.dot(get_word2vector(child0, word2vecDic=word2vecDic),
                                                       get_word2vector(ele, word2vecDic=word2vecDic)))
    print(children)
    N = int(np.ceil(np.log(len(children))))
    open(logFile, 'w+')
    while children:
        child = children.pop()
        k = 512
        addDim0 = list(bin(N))[2:][:DIM]
        if len(addDim0) < DIM:
            addDim0 += [0] * (DIM - len(addDim0))
        addDim = [int(ele) * 2 - 1 for ele in addDim0]
        addDim = [ele * k for ele in addDim]
        print("***", child)
        with open(logFile, 'a+') as wlog:
            wlog.write(" ".join([str(ele) for ele in [child]
                                    +addDim
                                    +[time.strftime("%Y-%m-%d %H:%M:%S",time.gmtime())]]))
            wlog.write("\n")
        word2ballDic = training_one_family(root=child, addDim=addDim, wsChildrenDic=wsChildrenDic,
                                           word2vecDic=word2vecDic, wscatCodeDic=wscatCodeDic,
                                           word2ballDic=word2ballDic,
                                           outputPath=outputPath, logFile=logFile)
        children = sorted(children, key=lambda ele: np.dot(get_word2vector(child, word2vecDic=word2vecDic),
                                                           get_word2vector(ele, word2vecDic=word2vecDic)))
    print("finished training of all families\n")

    if checking:
        print("checking each family\n")
        maxsize, mindim, word2ballDic = load_balls(ipath=outputPath, word2ballDic=word2ballDic)

        failed_P, failed_DC = [], []

        for child in get_children(root):
            failed_P += check_P_for_child_parent_in_one_family(child, word2ballDic =word2ballDic,
                                                               wsChildrenDic=wsChildrenDic, ballPath=outputPath)
            failed_DC += check_DC_for_sibilings_in_one_family(root=child, word2ballDic =word2ballDic,
                                                              wsChildrenDic=wsChildrenDic)
        print("failed families with P", failed_P)
        print("failed families with DC", failed_DC)
    return word2ballDic
def testing_whole_family(outputPath=None, wsChildrenDic=dict(), word2ballDic=dict(), outputBallFile=None):
    """
    :param outputPath:
    :param wsChildrenDic:
    :param word2ballDic:
    :param outputBallFile:
    :return:
    """
    print("checking whether the tree structure is perfectly encoded in nball embeddings...\n")
    failed_P, failed_DC = [], []
    maxsize, mindim, word2ballDic = load_balls(ipath = outputPath, word2ballDic=word2ballDic)

    for froot in get_children('*root*', wsChildrenDic=wsChildrenDic):
        failed_P += check_P_for_child_parent_in_one_family(froot,
                                                           wsChildrenDic=wsChildrenDic,
                                                           word2ballDic=word2ballDic,
                                                           ballPath=outputPath)

    failed_DC += check_DC_for_sibilings_in_one_family(root='*root*', wsChildrenDic=wsChildrenDic,
                                                      word2ballDic=word2ballDic)
    print("failed families with P", failed_P)
    print("failed families with DC", failed_DC)
    if failed_P == [] and failed_DC == []:
        print("the tree structure is perfectly encoded in nball embeddings.\n")
        print("generating nball embedding file...\n")
        merge_balls_into_file(ipath= outputPath, outfile=outputBallFile)
    else:
        print("the tree structure is NOT perfectly encoded in nball embeddings.\n")
        print("try again, or contact the author")
def check_DC_for_sibilings_in_one_family(root="*root*", wsChildrenDic=dict(), word2ballDic=dict()):
    """
    :param root:
    :param wsChildrenDic:
    :param word2ballDic:
    :return:
    """
    lst = [root]
    checkResult = []
    while lst:
        parent = lst.pop()
        children = get_children(parent, wsChildrenDic=wsChildrenDic)
        lst += children
        if len(children) <2:
            continue
        i,j = 0, 0
        while i < len(children):
            j = i + 1
            while j < len(children):
                if not qsr_DC(word2ballDic[children[i]], word2ballDic[children[j]]):
                    print(children[i], children[j], 'violates condition 4')
                    # print('shall >=0', str(qsr_DC_degree(word2ballDic[children[i]], word2ballDic[children[j]])))
                    # return [root]
                    checkResult.append((children[i], children[j]))
                j += 1
            i += 1
    return checkResult
def check_P_for_child_parent_in_one_family(root=None, wsChildrenDic=dict(), word2ballDic=dict(), ballPath=""):
    """
    :param root:
    :param wsChildrenDic:
    :param word2ballDic:
    :param ballPath:
    :return:
    """
    lst = [root]
    while lst:
        parent = lst.pop()
        pBall = get_ball_from_file(parent, ballPath = ballPath) #word2ballDic[parent]
        children = get_children(parent, wsChildrenDic=wsChildrenDic)
        lst += children
        for child in children:
            chBall = get_ball_from_file(child, ballPath = ballPath) #word2ballDic[child]
            if not qsr_P(word2ballDic[child], word2ballDic[parent]):
                print(child, parent, 'violates condition 3')
                dis = dis_between_ball_centers(chBall, pBall)
                print('dis:', dis)
                print('r1', chBall[-1])
                print('R', pBall[-1])
                print('shall >=0', pBall[-1]- dis - chBall[-1])
                # assert qsr_P_by_name(child, parent), str(qsr_P_degree_by_name(child, parent))
                return [root]
    # print("passed checking ", root, ' for part of')
    return []
def make_DC_for_first_level_children(root="*root*",
                                     firstChild='entity.n.01',
                                     wsChildrenDic=dict(),
                                     outputPath='',
                                     maxsize=0,
                                     mindim=0,
                                     word2ballDic=dict(),
                                     logFile=None):
    """
    :param root:
    :param firstChild:
    :param wsChildrenDic:
    :param outputPath:
    :param maxsize:
    :param mindim:
    :param word2ballDic:
    :param logFile:
    :param checking:
    :return:
    """
    children = get_children(root, wsChildrenDic=wsChildrenDic)
    children.remove(firstChild)
    children.insert(0, firstChild)
    print('updating first level children...')
    word2ballDic = training_DC_by_name(children,
                                       outputPath=outputPath,
                                       wsChildrenDic=wsChildrenDic,
                                       word2ballDic=word2ballDic,
                                       ordered=True,
                                       logFile=logFile)
    return word2ballDic
def training_one_family(treeStruc=None,root=None, addDim=[], wsChildrenDic = dict(), word2vecDic=dict(),
                        wscatCodeDic=dict(),
                        word2ballDic = dict(), outputPath=None, logFile=None):
    """
    :param treeStruc:
    :param root:
    :param addDim:
    :param wsChildrenDic:
    :param word2vecDic:
    :param wscatCodeDic:
    :param word2ballDic:
    :param outputPath:
    :param logFile:
    :return:
    """
    if treeStruc:
        children = treeStruc[root]
    else:
        children = get_children(root, wsChildrenDic=wsChildrenDic)
    if len(children) > 0:
        for child in children:
            word2ballDic = training_one_family(treeStruc=treeStruc, root=child, addDim=addDim,
                                               wsChildrenDic=wsChildrenDic,
                                               word2vecDic=word2vecDic, wscatCodeDic=wscatCodeDic,
                                               word2ballDic=word2ballDic,
                                               outputPath=outputPath, logFile=logFile)
        # children shall be separated
        if len(children) > 1:
            # print('training dc of root', root)
            word2ballDic = training_DC_by_name(children, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic,
                                               outputPath=outputPath, logFile=logFile)
        # root ball shal contain all children balls
        word2ballDic = making_ball_contains(root, children,  addDim=addDim, word2vecDic=word2vecDic,
                                            wsChildrenDic=wsChildrenDic, wscatCodeDic=wscatCodeDic,
                                            word2ballDic =word2ballDic, outputPath=outputPath, logFile=logFile)
        return word2ballDic

    else:
        ball, word2ballDic = initialize_ball(root,  addDim=addDim, L0=L0, R0=R0,
                                             word2vecDic=word2vecDic, word2ballDic=word2ballDic,
                                             wscatCodeDic=wscatCodeDic,
                                             outputPath=outputPath)
        return word2ballDic
Example #7
0
def homothetic_recursive_transform_of_decendents(tree,
                                                 root=None,
                                                 rate=None,
                                                 wsChildrenDic=dict(),
                                                 word2ballDic=dict(),
                                                 outputPath=None):
    """
        for child of tree:
        homothetic_recursive_transform_of_decendents_by_name(child, rate=None, outputPath=outputPath)
    l1, r1 = word2ballDic[tree][-2:]
    l = np.sqrt(l1*l1 + deltaL*deltaL
                    + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2]))
    newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL))
    word2ballDic[tree] = list(newVec) + [l, r1]

    for child of tree:
        while True:
            delta = qsr_DC_degree_by_name(child, tree)
            if delta < 0:
                word2ballDic[tree][-2] += - delta*1.01
            else:
                break

    create_ball_file(tree, outputPath=outputPath)

    :param tree:
    :param root:
    :param rate:
    :param wsChildrenDic:
    :param word2ballDic:
    :param outputPath:
    :return:
    """
    if rate != 1:
        for child in get_children(tree,
                                  wsChildrenDic=wsChildrenDic,
                                  word2ballDic=word2ballDic):
            word2ballDic = homothetic_recursive_transform_of_decendents(
                child,
                root=root,
                rate=rate,
                word2ballDic=word2ballDic,
                wsChildrenDic=wsChildrenDic,
                outputPath=outputPath)

        if tree == root:
            return word2ballDic

        l = decimal.Decimal(word2ballDic[tree][-2])
        # l = word2ballDic[tree][-2]
        word2ballDic[tree][-2] = l * rate

        assert word2ballDic[tree][-2] != np.inf and word2ballDic[tree][-2] >= 0

        word2ballDic[tree][-1] = l * rate - (l - word2ballDic[tree][-1]) * rate
        # word2ballDic[tree][-1] *=  rate
        if outputPath:
            create_ball_file(tree,
                             outputPath=outputPath,
                             word2ballDic=word2ballDic)

        i, j, lst = 0, 0, get_children(tree,
                                       wsChildrenDic=wsChildrenDic,
                                       word2ballDic=word2ballDic)
        for i in range(len(lst) - 1):
            j = i + 1
            while j < len(lst):
                dcDelta = qsr_DC_degree(word2ballDic[lst[i]],
                                        word2ballDic[lst[j]])
                if dcDelta < 0:
                    print(lst[j], lst[i], j, i)
                    word2ballDic = rotate_vector_till(
                        lst[j],
                        lst[i],
                        word2ballDic=word2ballDic,
                        logFile='word2ball.log')
                j += 1

        if outputPath:
            for child in get_children(tree,
                                      wsChildrenDic=wsChildrenDic,
                                      word2ballDic=word2ballDic):
                create_ball_file(child,
                                 outputPath=outputPath,
                                 word2ballDic=word2ballDic)

        for child in get_children(tree,
                                  wsChildrenDic=wsChildrenDic,
                                  word2ballDic=word2ballDic):
            gap = 1
            while True:
                delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree])
                if delta < 0:
                    print('delta:', delta)
                    word2ballDic[tree][-1] += -delta + gap
                    gap *= 10
                else:
                    break
        if outputPath:
            create_ball_file(tree,
                             outputPath=outputPath,
                             word2ballDic=word2ballDic)
    return word2ballDic
Example #8
0
def shift_whole_tree_of(tree,
                        deltaVec,
                        deltaL,
                        wsChildrenDic=dict(),
                        word2ballDic=dict(),
                        outputPath=None):
    """
    :param tree:
    :param deltaVec:
    :param deltaL:
    :param wsChildrenDic:
    :param word2ballDic:
    :param outputPath:
    :return:


    for child of tree:
        shift_whole_tree_of(child, deltaVec, deltaL, outputPath=None)

    l1, r1 = word2ballDic[tree][-2:]
    l = np.sqrt(l1*l1 + deltaL*deltaL
                    + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2]))
    newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL))
    word2ballDic[tree] = list(newVec) + [l, r1]

    for child of tree:
        while True:
            delta = qsr_DC_degree_by_name(child, tree)
            if delta < 0:
                word2ballDic[tree][-2] += - delta*1.01
            else:
                break

    create_ball_file(tree, outputPath=outputPath)
    """
    for child in get_children(tree,
                              wsChildrenDic=wsChildrenDic,
                              word2ballDic=word2ballDic):
        word2ballDic = shift_whole_tree_of(child,
                                           deltaVec,
                                           deltaL,
                                           wsChildrenDic=wsChildrenDic,
                                           word2ballDic=word2ballDic,
                                           outputPath=outputPath)

    l1, r1 = word2ballDic[tree][-2:]
    l = np.sqrt(l1 * l1 + deltaL * deltaL +
                2 * l1 * deltaL * vec_cos(deltaVec, word2ballDic[tree][:-2]))
    newVec = vec_norm(
        vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL))
    word2ballDic[tree] = list(newVec) + [l, r1]

    i, j, lst = 0, 0, get_children(tree,
                                   wsChildrenDic=wsChildrenDic,
                                   word2ballDic=word2ballDic)
    for i in range(len(lst) - 1):
        j = i + 1
        while j < len(lst):
            dcDelta = qsr_DC_degree(word2ballDic[lst[i]], word2ballDic[lst[j]])
            if dcDelta < 0:
                print(lst[j], lst[i], j, i)
                word2ballDic = rotate_vector_till(lst[j],
                                                  lst[i],
                                                  word2ballDic=word2ballDic,
                                                  logFile='word2ball.log')
            j += 1

    for child in get_children(tree,
                              wsChildrenDic=wsChildrenDic,
                              word2ballDic=word2ballDic):
        gap = 1
        while True:
            delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree])
            if delta < 0:
                gap *= 2
                word2ballDic[tree][-1] += -delta + gap
            else:
                break
    if outputPath:
        create_ball_file(tree,
                         outputPath=outputPath,
                         word2ballDic=word2ballDic)
    return word2ballDic