def translate(API, inputJson, outputJson, inputDividedFolder=None, outputDividedFolder=None, trace=True, c=TranslateConfig()): if inputDividedFolder is None: inputDividedFolder = TEMP_PATH + '/{}-{}'.format( splitPath(inputJson)[1], API) if outputDividedFolder is None: outputDividedFolder = TEMP_PATH + '/{}-{}'.format( splitPath(outputJson)[1], API) # divide if not os.path.exists(inputDividedFolder): os.makedirs(inputDividedFolder, exist_ok=True) divideListJson(inputJson, c.SLICE_LENGTH, inputDividedFolder) # translate os.makedirs(outputDividedFolder, exist_ok=True) fileList = getFileList(inputDividedFolder, lambda fpath: splitPath(fpath)[2] == '.json') _translate(API, fileList, outputDividedFolder, c) # merge mergeListJson(outputJson, outputDividedFolder) # check check(inputJson, outputJson) # delete if not trace: shutil.rmtree(inputDividedFolder) shutil.rmtree(outputDividedFolder)
def mergeListJson(saveJson, folder): fileList = getFileList(folder, lambda fpath: splitPath(fpath)[2] == '.json') orderList = [int(splitPath(fpath)[1]) for fpath in fileList] orderList, fileList = zip(*sorted(zip(orderList, fileList))) retList = [] for fpath in fileList: retList.extend(json.load(open(fpath))) getSaveFunc(JSON_FILE_FORMAT)(retList, saveJson)
def update(API, inputJson, outputJson, needUpdateFunc, c=TranslateConfig()): inputList = json.load(open(inputJson)) outputList = json.load(open(outputJson)) assert len(inputList) == len(outputList) posEngList = [] # [(rank, eng), ...] for i in range(len(inputList)): inTerm, outTerm = inputList[i], outputList[i] if needUpdateFunc(inTerm, outTerm): posEngList.append((i, inTerm)) ufname = splitPath(outputJson)[1] updateInJson = TEMP_PATH + '/{}-update-input.json'.format(ufname) updateOutJson = TEMP_PATH + '/{}-update-output.json'.format(ufname) getSaveFunc(JSON_FILE_FORMAT)([engTerm for i, engTerm in posEngList], updateInJson) translate(API, updateInJson, updateOutJson, trace=False, c=c) updateOutList = json.load(open(updateOutJson)) for ui, uTerm in enumerate(updateOutList): print('{} -> {}'.format(outputList[posEngList[ui][0]], uTerm)) # debug outputList[posEngList[ui][0]] = uTerm getSaveFunc(JSON_FILE_FORMAT)(outputList, outputJson)
def __init__(self): super(GradedMT, self).__init__() self.SAVE_MT_PKL = DATA_PATH+'/umlsMT/GradedMT_2017_1.1.pkl' self.SAVE_MT_DEMO_JSON = DATA_PATH+'/umlsMT/{}_{}.json'.format(splitPath(self.SAVE_MT_PKL)[1], self.DEMO_TERM_NUM)