def processAllRecordsSTR(pathToMemex): files = functions.dicOfRelevantFiles(pathToMemex, ".bib") citeKeys = list(files.keys()) random.shuffle(citeKeys) for citeKey in citeKeys: print(citeKey) bibData = functions.loadBib(files[citeKey]) if "pagetotal" in bibData: pageTotal = int(bibData["pagetotal"]) if pageTotal <= int(settings["page_limit"]): language = functions.identifyLanguage(bibData[citeKey], "eng") ocrPublication(citeKey, language, settings["page_limit"]) else: language = functions.identifyLanguage(bibData[citeKey], "eng") ocrPublication(citeKey, language, settings["page_limit"]) functions.memexStatusUpdates(settings["path_to_memex"], ".pdf") functions.memexStatusUpdates(settings["path_to_memex"], ".bib") functions.memexStatusUpdates(settings["path_to_memex"], ".png") functions.memexStatusUpdates(settings["path_to_memex"], ".json")
def processAllRecords(bibDataFile): bibData = functions.loadBib(bibDataFile) keys = list(bibData.keys()) random.shuffle(keys) for key in keys: bibRecord = bibData[key] functions.processBibRecord(settings["path_to_memex"], bibRecord) language = functions.identifyLanguage(bibRecord["rCite"], "eng") ocrPublication(bibRecord["rCite"], language) functions.memexStatusUpdates(settings["path_to_memex"], ".pdf") functions.memexStatusUpdates(settings["path_to_memex"], ".bib") functions.memexStatusUpdates(settings["path_to_memex"], ".png") functions.memexStatusUpdates(settings["path_to_memex"], ".json")
def processAllRecords(bibDataFile): # load the bib file as dictionary using the function from previous step bibData = functions.loadBib(bibDataFile) # save the keys of the dictionary bibData as a list keys = list(bibData.keys()) random.shuffle(keys) print print(str(keys)) # in a loop, process each key from the list keys (i.e. each record by citation key) for key in keys: bibRecord = bibData[key] # run the function from the previous step that creates a path with pdf and bib files, if not already there functions.processBibRecord(settings["path_to_memex"], bibRecord) language = functions.identifyLanguage(bibRecord, "eng") # run the function that saves ocr-ed text as json files and created .png images for each page ocrPublication(bibRecord["rCite"], language)
def processAllRecords(bibDataFile): #defines a functions for all the records bibData = functions.loadBib( bibDataFile) #loops through key-value-pairs in the bibData-dictionary keys = list(bibData.keys()) #keys from the list random.shuffle(keys) #randomizes the OCRing for key in keys: #loops through the keys bibRecord = bibData[key] #adds a key to the bibData functions.processBibRecord(settings["path_to_memex"], bibRecord) #assigns a new parameter language = functions.identifyLanguage( bibRecord["rCite"], "eng") #identifies a language, assigns the "eng" ocrPublication(bibRecord["rCite"], language, int( settings["page_limit"])) #sets a page limit, if there is such functions.memexStatusUpdates(settings["path_to_memex"], ".pdf") #creates a pdf functions.memexStatusUpdates(settings["path_to_memex"], ".bib") #creates a bib functions.memexStatusUpdates(settings["path_to_memex"], ".png") #creates a png functions.memexStatusUpdates(settings["path_to_memex"], ".json") #creates a jsonfile