Beispiel #1
0
def processAllRecords(bibData):
    for k,v in bibData.items():
        # 1. create folders, copy files 
        functions.processBibRecord(memexPath, v)

        # 2. OCR the file
        language = identifyLanguage(v, "eng")
        ocrPublication(memexPath, v["rCite"], language)
def processAllRecords(
    bibData
):  # Takes the bibData as an argument and loops through the dictionary
    for k, v in bibData.items():
        # 1. create folders, copy files
        functions.processBibRecord(memexPath, v)

        # 2. OCR the file
        language = identifyLanguage(v, "eng")  # Identifies the language as eng
        ocrPublication(memexPath, v["rCite"], language)  # OCRs the publication
Beispiel #3
0
def processAllRecords(
        bibData):  #defines a functions to process all your records
    for k, v in bibData.items(
    ):  #loops through key-value-pairs in your bibData-dictionary
        # 1. create folders, copy files
        functions.processBibRecord(memexPath, v)
        # 2. OCR the file
        language = identifyLanguage(v, "eng")
        ocrPublication(
            memexPath, v["rCite"], language
        )  #assigns the parameters to your previously defined function
def processAllRecords(bibData):
    keys = list(bibData.keys())
    random.shuffle(keys)

    for key in keys:
        bibRecord = bibData[key]

        functions.processBibRecord(pathToMemex, bibRecord)

        language = identifyLanguage(bibRecord, "eng")
        ocrPublication(pathToMemex, bibRecord["rCite"], language)
def processAllRecords(bibData): #define function, 1 parameter
    keys = list(bibData.keys()) # define variable bib keys
    random.shuffle(keys) #allows multiprocessing; every time the function is executed it starts with a different pdf

    for key in keys: #loop through every individual bib key
        bibRecord = bibData[key] #store inormation in new variable

        # 1. create folders, copy files
        functions.processBibRecord(memexPath, bibRecord) #pre-defined function in functions.py

        # 2. OCR the file
        language = identifyLanguage(bibRecord, "eng") #use pre-defined function to determine pdf's language
        ocrPublication(memexPath, bibRecord["rCite"], language) #use pre-defined function to extract text from images
Beispiel #6
0
def processAllRecords(bibData):
    keys = list(bibData.keys())
    random.shuffle(keys)

    for key in keys:
        bibRecord = bibData[key]

        # 1. create folders, copy files
        functions.processBibRecord(memexPath, bibRecord)

        # 2. OCR the file
        language = identifyLanguage(bibRecord, "eng")
        ocrPublication(memexPath, bibRecord["rCite"], language)
Beispiel #7
0
def processAllRecords(bibData):  ## now function to process all the pdfs
    keys = list(bibData.keys(
    ))  ## in a list and random to do more than one process at a time
    random.shuffle(keys)

    for key in keys:  ## looping through keys; applying the function from above to all of the pdfs;
        bibRecord = bibData[key]

        functions.processBibRecord(memexPath, bibRecord)

        language = identifyLanguage(bibRecord,
                                    "eng")  ## checking language every time
        ocrPublication(memexPath, bibRecord["rCite"], language)
Beispiel #8
0
def processAllRecords(
        bibData):  #defines a functions to process all your records
    keys = list(bibData.keys())  #extracts the keys of your dictionary
    random.shuffle(keys)  #shuffles the keys

    for key in keys:  #randomly loops through the keys
        bibRecord = bibData[key]  #chooses a random record to process

        # 1. create folders, copy files
        functions.processBibRecord(memexPath, bibRecord)

        # 2. OCR the file
        language = identifyLanguage(bibRecord, "eng")
        ocrPublication(memexPath, bibRecord["rCite"], language)
def processAllRecords(bibData):
    keys = list(
        bibData.keys())  # Grabs the keys from the dictionary into the list
    random.shuffle(keys)  # Shuffles the order of the keys in the list

    for key in keys:  # Processes records based on this list (since the list will be different
        # every time, whenever the script is run it'll start processing another script).
        bibRecord = bibData[key]

        # 1. Create folders, copy files
        functions.processBibRecord(memexPath, bibRecord)

        # 2. OCR the file
        language = identifyLanguage(bibRecord, "eng")
        ocrPublication(memexPath, bibRecord["rCite"], language)
Beispiel #10
0
def processAllRecords(bibDataFile):
    bibData = functions.loadBib(bibDataFile)
    keys = list(bibData.keys())
    random.shuffle(keys)

    for key in keys:
        bibRecord = bibData[key]
        functions.processBibRecord(settings["path_to_memex"], bibRecord)
        language = functions.identifyLanguage(bibRecord["rCite"], "eng")
        ocrPublication(bibRecord["rCite"], language)

    functions.memexStatusUpdates(settings["path_to_memex"], ".pdf")
    functions.memexStatusUpdates(settings["path_to_memex"], ".bib")
    functions.memexStatusUpdates(settings["path_to_memex"], ".png")
    functions.memexStatusUpdates(settings["path_to_memex"], ".json")
Beispiel #11
0
def processAllRecords(bibDataFile):
    # load the bib file as dictionary using the function from previous step
    bibData = functions.loadBib(bibDataFile)
    # save the keys of the dictionary bibData as a list
    keys = list(bibData.keys())
    random.shuffle(keys)
    print
    print(str(keys))
    # in a loop, process each key from the list keys (i.e. each record by citation key)
    for key in keys:
        bibRecord = bibData[key]
        # run the function from the previous step that creates a path with pdf and bib files, if not already there
        functions.processBibRecord(settings["path_to_memex"], bibRecord)
        language = functions.identifyLanguage(bibRecord, "eng")
        # run the function that saves ocr-ed text as json files and created .png images for each page
        ocrPublication(bibRecord["rCite"], language)
Beispiel #12
0
def processAllRecords(bibData):
    # save the keys from the dictionary bibData to the list keys
    keys = list(bibData.keys())
    # pick random element from the shuffled list keys
    random.shuffle(keys)

    # loop through each key from the list keys
    for key in keys:
        # save the bibData record to bibRecord
        bibRecord = bibData[key]

        # 1. create folders, copy files
        # call the function processBibRecord with the memexPath and the bibRecord as input values
        functions.processBibRecord(memexPath, bibRecord)

        # 2. OCR the file
        # call the function identifyLanguage with the bibRecord and the fallBackLanguage as input values and save the return value to language
        language = identifyLanguage(bibRecord, "eng")
        ocrPublication(memexPath, bibRecord["rCite"], language)
Beispiel #13
0
def processAllRecords(bibDataFile):  #defines a functions for all the records
    bibData = functions.loadBib(
        bibDataFile)  #loops through key-value-pairs in the bibData-dictionary
    keys = list(bibData.keys())  #keys from the list
    random.shuffle(keys)  #randomizes the OCRing

    for key in keys:  #loops through the keys
        bibRecord = bibData[key]  #adds a key to the bibData
        functions.processBibRecord(settings["path_to_memex"],
                                   bibRecord)  #assigns a new parameter
        language = functions.identifyLanguage(
            bibRecord["rCite"],
            "eng")  #identifies a language, assigns the "eng"
        ocrPublication(bibRecord["rCite"], language, int(
            settings["page_limit"]))  #sets a page limit, if there is such

    functions.memexStatusUpdates(settings["path_to_memex"],
                                 ".pdf")  #creates a pdf
    functions.memexStatusUpdates(settings["path_to_memex"],
                                 ".bib")  #creates a bib
    functions.memexStatusUpdates(settings["path_to_memex"],
                                 ".png")  #creates a png
    functions.memexStatusUpdates(settings["path_to_memex"],
                                 ".json")  #creates a jsonfile
Beispiel #14
0
def processAllRecords(bibData):
    for k, v in bibData.items():
        functions.processBibRecord(memexPath, v)