def saveDemo(self): medicalThesaurus = self.genMT() smallMT = { AUI: medicalThesaurus[AUI] for AUI in random.sample(list(medicalThesaurus), self.DEMO_TERM_NUM) } getSaveFunc(JSON_FILE_FORMAT)(smallMT, self.SAVE_MT_DEMO_JSON)
def mergeListJson(saveJson, folder): fileList = getFileList(folder, lambda fpath: splitPath(fpath)[2] == '.json') orderList = [int(splitPath(fpath)[1]) for fpath in fileList] orderList, fileList = zip(*sorted(zip(orderList, fileList))) retList = [] for fpath in fileList: retList.extend(json.load(open(fpath))) getSaveFunc(JSON_FILE_FORMAT)(retList, saveJson)
def update(API, inputJson, outputJson, needUpdateFunc, c=TranslateConfig()): inputList = json.load(open(inputJson)) outputList = json.load(open(outputJson)) assert len(inputList) == len(outputList) posEngList = [] # [(rank, eng), ...] for i in range(len(inputList)): inTerm, outTerm = inputList[i], outputList[i] if needUpdateFunc(inTerm, outTerm): posEngList.append((i, inTerm)) ufname = splitPath(outputJson)[1] updateInJson = TEMP_PATH + '/{}-update-input.json'.format(ufname) updateOutJson = TEMP_PATH + '/{}-update-output.json'.format(ufname) getSaveFunc(JSON_FILE_FORMAT)([engTerm for i, engTerm in posEngList], updateInJson) translate(API, updateInJson, updateOutJson, trace=False, c=c) updateOutList = json.load(open(updateOutJson)) for ui, uTerm in enumerate(updateOutList): print('{} -> {}'.format(outputList[posEngList[ui][0]], uTerm)) # debug outputList[posEngList[ui][0]] = uTerm getSaveFunc(JSON_FILE_FORMAT)(outputList, outputJson)
def divideListJson(inputJson, sliceLen, folder): """切分list并以json文件的形式存储于folder文件夹 Args: list (list): 待切分列表 sliceNum (int): 切分后每个子列表的长度 folder (str): 文件夹路径 Returns: list: [jsonPath1, jsonPath2, ...] """ ll = json.load(open(inputJson)) saveJsonFunc = getSaveFunc(JSON_FILE_FORMAT) count = 0 savePathList = [] for i in range(0, len(ll), sliceLen): count += 1 savepath = folder + os.sep + str(count) + '.json' saveJsonFunc(ll[i:i + sliceLen], savepath) savePathList.append(savepath) return savePathList
# print 'counting charactersNum' # print 'characters number:', countCharactersNum([termDict['STR'] for termDict in dataList]) # print 'examining AUI...' # examineAUI(dataList) # print 'done' # print 'counting CUI' # print 'CUI Number:', countCUI(dataList) # # print 'counting isPreffer' # prefNum, prefCharacNum = countIsPref(dataList) # print 'isPreffer number:', prefNum # print 'characters number:', prefCharacNum print('getting unique English terms') AUIList, termList = getEngTerms(dataList) assert len(AUIList) == len(termList) print('total unique English terms:', len(AUIList)) print('counting characters') charactersNum = countCharactersNum(termList) print('character number:', charactersNum) print('saving as json') saveAsJson = getSaveFunc(JSON_FILE_FORMAT) saveAsJson(AUIList, DATA_PATH + '/umlsMT/translate/AUI.json') saveAsJson(termList, DATA_PATH + '/umlsMT/translate/termENG.json') print('done')