Beispiel #1
0
def processAllRecords(bibDataFile):
    bibData = functions.loadBib(bibDataFile)
    keys = list(bibData.keys())
    random.shuffle(keys)

    for key in keys:
        bibRecord = bibData[key]
        functions.processBibRecord(settings["path_to_memex"], bibRecord)
        language = functions.identifyLanguage(bibRecord["rCite"], "eng")
        ocrPublication(bibRecord["rCite"], language)

    functions.memexStatusUpdates(settings["path_to_memex"], ".pdf")
    functions.memexStatusUpdates(settings["path_to_memex"], ".bib")
    functions.memexStatusUpdates(settings["path_to_memex"], ".png")
    functions.memexStatusUpdates(settings["path_to_memex"], ".json")
Beispiel #2
0
def processAllRecordsSTR(pathToMemex):
    files = functions.dicOfRelevantFiles(pathToMemex, ".bib")
    citeKeys = list(files.keys())
    random.shuffle(citeKeys)

    for citeKey in citeKeys:
        print(citeKey)
        bibData = functions.loadBib(files[citeKey])
        if "pagetotal" in bibData:
            pageTotal = int(bibData["pagetotal"])
            if pageTotal <= int(settings["page_limit"]):
                language = functions.identifyLanguage(bibData[citeKey], "eng")
                ocrPublication(citeKey, language, settings["page_limit"])
        else:
            language = functions.identifyLanguage(bibData[citeKey], "eng")
            ocrPublication(citeKey, language, settings["page_limit"])

    functions.memexStatusUpdates(settings["path_to_memex"], ".pdf")
    functions.memexStatusUpdates(settings["path_to_memex"], ".bib")
    functions.memexStatusUpdates(settings["path_to_memex"], ".png")
    functions.memexStatusUpdates(settings["path_to_memex"], ".json")
Beispiel #3
0
def processAllRecords(bibDataFile):  #defines a functions for all the records
    bibData = functions.loadBib(
        bibDataFile)  #loops through key-value-pairs in the bibData-dictionary
    keys = list(bibData.keys())  #keys from the list
    random.shuffle(keys)  #randomizes the OCRing

    for key in keys:  #loops through the keys
        bibRecord = bibData[key]  #adds a key to the bibData
        functions.processBibRecord(settings["path_to_memex"],
                                   bibRecord)  #assigns a new parameter
        language = functions.identifyLanguage(
            bibRecord["rCite"],
            "eng")  #identifies a language, assigns the "eng"
        ocrPublication(bibRecord["rCite"], language, int(
            settings["page_limit"]))  #sets a page limit, if there is such

    functions.memexStatusUpdates(settings["path_to_memex"],
                                 ".pdf")  #creates a pdf
    functions.memexStatusUpdates(settings["path_to_memex"],
                                 ".bib")  #creates a bib
    functions.memexStatusUpdates(settings["path_to_memex"],
                                 ".png")  #creates a png
    functions.memexStatusUpdates(settings["path_to_memex"],
                                 ".json")  #creates a jsonfile
                prevPage = "" #there is no previous page
            elif k == "0001": #if the page is the first page of the record
                nextPage = "0002.html" #the next page is the second page of the record
                prevPage = "DETAILS.html" #the previous page is the Details page
            elif o == len(orderedPages)-1: #if the page is the last page of the record
                nextPage = "" #there is no next page
                prevPage = orderedPages[o-1] + ".html" #the previous page is the penultimate page of the record
            else: #for all other pages
                nextPage = orderedPages[o+1] + ".html" #the next page is the page behind in the record
                prevPage = orderedPages[o-1] + ".html" #the previous page is the page before in the record

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage) #replace the wildcard with a link to the page assigned in the lines before
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage) #replace the Previouspagehtml item with a link to the page assigned in the lines before

            pagePath = os.path.join(pathToBibFile.replace(citeKey+".bib", ""), "pages", "%s.html" % k) #create a filepath to each page in the pages-folder of each publication
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp) #create and save each page in that pages folder

###########################################################
# FUNCTIONS TESTING #######################################
###########################################################

functions.memexStatusUpdates(settings["path_to_memex"], ".html") #execute the memexStatusUpdates-function
def processAllRecords(pathToMemex): 
    files = functions.dicOfRelevantFiles(pathToMemex, ".bib") #take the bibFiles
    for citeKey, pathToBibFile in files.items(): #loop through them
        if os.path.exists(pathToBibFile.replace(".bib", ".json")): #search for files with json extension
            generatePublicationInterface(citeKey, pathToBibFile) #execute the previous function

processAllRecords(settings["path_to_memex"]) #execute the overall function
exec(open("6_Interface_IndexPage.py").read())
Beispiel #5
0
                prevPage = ""
            elif k == "0001":
                nextPage = "0002.html"
                prevPage = "DETAILS.html"
            elif o == len(orderedPages)-1:
                nextPage = ""
                prevPage = orderedPages[o-1] + ".html"
            else:
                nextPage = orderedPages[o+1] + ".html"
                prevPage = orderedPages[o-1] + ".html"

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage)
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

            pagePath = os.path.join(pathToBibFile.replace(citeKey+".bib", ""), "pages", "%s.html" % k)
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp)

###########################################################
# FUNCTIONS TESTING #######################################
###########################################################

functions.memexStatusUpdates(settings["path_to_memex"], ".html")
def processAllRecords(pathToMemex):
    files = functions.dicOfRelevantFiles(pathToMemex, ".bib")
    for citeKey, pathToBibFile in files.items():
        if os.path.exists(pathToBibFile.replace(".bib", ".json")):
            generatePublicationInterface(citeKey, pathToBibFile)

processAllRecords(settings["path_to_memex"])
exec(open("9_Interface_IndexPage.py").read())
Beispiel #6
0
            else:
                nextPage = orderedPages[o + 1] + ".html"
                prevPage = orderedPages[o - 1] + ".html"

            pageTemp = pageTemp.replace("@NEXTPAGEHTML@", nextPage)
            pageTemp = pageTemp.replace("@PREVIOUSPAGEHTML@", prevPage)

            pagePath = os.path.join(
                pathToBibFile.replace(citeKey + ".bib", ""), "pages",
                "%s.html" % k)
            with open(pagePath, "w", encoding="utf8") as f9:
                f9.write(pageTemp)


###########################################################
# PROCESS ALL RECORDS #####################################
###########################################################

functions.memexStatusUpdates(memexPath, ".html")


def processAllRecords():
    relDic = functions.dicOfRelevantFiles(memexPath, "bib")
    for k, v in relDic.items():
        generatePublicationInterface(k[:-1], v)


processAllRecords()

exec(open("7_IndexPage.py").read())