def testing_whole_family(outputPath=None, wsChildrenDic=dict(), word2ballDic=dict(), outputBallFile=None): """ :param outputPath: :param wsChildrenDic: :param word2ballDic: :param outputBallFile: :return: """ print("checking whether the tree structure is perfectly encoded in nball embeddings...\n") failed_P, failed_DC = [], [] maxsize, mindim, word2ballDic = load_balls(ipath = outputPath, word2ballDic=word2ballDic) for froot in get_children('*root*', wsChildrenDic=wsChildrenDic): failed_P += check_P_for_child_parent_in_one_family(froot, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, ballPath=outputPath) failed_DC += check_DC_for_sibilings_in_one_family(root='*root*', wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic) print("failed families with P", failed_P) print("failed families with DC", failed_DC) if failed_P == [] and failed_DC == []: print("the tree structure is perfectly encoded in nball embeddings.\n") print("generating nball embedding file...\n") merge_balls_into_file(ipath= outputPath, outfile=outputBallFile) else: print("the tree structure is NOT perfectly encoded in nball embeddings.\n") print("try again, or contact the author")
def train_word2ball(root="", outputPath = '', logFile='', wsChildrenDic=dict(), word2ballDic=dict(), word2vecDic=dict(), outputPathBack = None, wscatCodeDic=dict(), outputBallFile=None): """ :param root: :param outputPath: :param logFile: :param wsChildrenDic: :param word2ballDic: :param word2vecDic: :param wscatCodeDic: :param outputBallFile: :param outputBallForestFile: :return: """ training_all_families(root=root, wsChildrenDic=wsChildrenDic, word2vecDic=word2vecDic, wscatCodeDic=wscatCodeDic, word2ballDic=word2ballDic, outputPath=outputPath, logFile=logFile) if outputPathBack: copy_tree(outputPath, outputPathBack) maxsize, mindim , word2ballDic = load_balls(ipath=outputPath, word2ballDic=word2ballDic) fix_dim(maxsize, mindim, bPath=outputPath, outputPath=outputPath) make_DC_for_first_level_children(root=root, firstChild = 'entity.n.01', wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath, maxsize=maxsize, mindim=mindim, logFile=logFile) testing_whole_family(outputPath=outputPath, wsChildrenDic=wsChildrenDic, outputBallFile=outputBallFile)
def training_all_families(root="*root*", wsChildrenDic=dict(), word2vecDic=dict(), wscatCodeDic=dict(), word2ballDic=dict(), outputPath=None, logFile=None, checking = False): """ :param root: :param wsChildrenDic: :param word2vecDic: :param wscatCodeDic: :param word2ballDic: :param outputPath: :param logFile: :param checking: :return: """ global L0, DIM children = get_children(root, wsChildrenDic=wsChildrenDic) child0= 'entity.n.01' children = sorted(children, key=lambda ele: np.dot(get_word2vector(child0, word2vecDic=word2vecDic), get_word2vector(ele, word2vecDic=word2vecDic))) print(children) N = int(np.ceil(np.log(len(children)))) open(logFile, 'w+') while children: child = children.pop() k = 512 addDim0 = list(bin(N))[2:][:DIM] if len(addDim0) < DIM: addDim0 += [0] * (DIM - len(addDim0)) addDim = [int(ele) * 2 - 1 for ele in addDim0] addDim = [ele * k for ele in addDim] print("***", child) with open(logFile, 'a+') as wlog: wlog.write(" ".join([str(ele) for ele in [child] +addDim +[time.strftime("%Y-%m-%d %H:%M:%S",time.gmtime())]])) wlog.write("\n") word2ballDic = training_one_family(root=child, addDim=addDim, wsChildrenDic=wsChildrenDic, word2vecDic=word2vecDic, wscatCodeDic=wscatCodeDic, word2ballDic=word2ballDic, outputPath=outputPath, logFile=logFile) children = sorted(children, key=lambda ele: np.dot(get_word2vector(child, word2vecDic=word2vecDic), get_word2vector(ele, word2vecDic=word2vecDic))) print("finished training of all families\n") if checking: print("checking each family\n") maxsize, mindim, word2ballDic = load_balls(ipath=outputPath, word2ballDic=word2ballDic) failed_P, failed_DC = [], [] for child in get_children(root): failed_P += check_P_for_child_parent_in_one_family(child, word2ballDic =word2ballDic, wsChildrenDic=wsChildrenDic, ballPath=outputPath) failed_DC += check_DC_for_sibilings_in_one_family(root=child, word2ballDic =word2ballDic, wsChildrenDic=wsChildrenDic) print("failed families with P", failed_P) print("failed families with DC", failed_DC) return word2ballDic