def prepare(): try: conn = pymysql.connect(host='localhost', port=8889, user='******', password='******', database='HAMLET') cursor = conn.cursor(pymysql.cursors.DictCursor) sql = "SELECT * FROM contributors" cursor.execute(sql) results = cursor.fetchall() # Contains all the contributors from hyperhamlet. Key of the contributor object is {firstname lastname} all_contributors = {} for row in results: contributor = { "hasFirstName": row["firstname"].strip(), "hasLastName": row["lastname"].strip(), "hasGender": "male" } # Email is assigned to description after '*' is removed from string if row["email"]: contributor["hasDescription"] = row["email"].replace('*', '') # Sets the gender of to female female = re.search("(\*)(.*)", row["email"]) if female: contributor["hasGender"] = "female" # Create a key which has the following format{firstName lastName} unique_key = "{} {}".format(contributor["hasLastName"], contributor["hasFirstName"]) # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters contributor_id = id.generate(unique_key) # Adding ID of SQL table contributor["sql"] = row["id"] # Adding the author to the all_author object all_contributors[contributor_id] = contributor conn.close() cursor.close() return all_contributors except Exception as err: print(err)
def prepare(): try: conn = pymysql.connect(host='localhost', port=8889, user='******', password='******', database='HAMLET') cursor = conn.cursor(pymysql.cursors.DictCursor) sql = "SELECT * FROM linecategories" cursor.execute(sql) results = cursor.fetchall() # Contains all the secondary books from hyperhamlet. Key of the book object is {internalID} all_sec_books = {} for row in results: books = re.search("(@\d{6})\s(.*)", row["name"]) if books: book = {} sec = re.search("SEC\s\-\s(.*)", books.group(2)) if sec: # id of the linecategory row in table # print("SEC ID: " + books.group(1) + " | SEC TITLE: " + sec.group(1)) book["hasBookInternalId"] = books.group(1) book = pref.get_prefix_book(sec.group(1), book) # Create a key which has the following format {internalID} unique_key = book["hasBookInternalId"] # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters book_id = id.generate(unique_key) # Adding ID of SQL table # book["sql"] = row["id"] # Adding the book to the allBooks object all_sec_books[book_id] = book conn.close() cursor.close() return all_sec_books except Exception as err: print(err)
def update_sec_book(sec_b_id, auth_names): # Iterates through the names per entry for auth_name in auth_names: # Generates author id auth_id = id.generate(auth_name) # Checks if author already exists if auth_id not in authors: create_author(auth_id) temp = set(books[sec_b_id]["isWrittenBy"]) temp.add(auth_id) books[sec_b_id]["isWrittenBy"] = list(temp)
def prepare(): try: conn = pymysql.connect(host='localhost', port=8889, user='******', password='******', database='HAMLET') cursor = conn.cursor(pymysql.cursors.DictCursor) sql = "SELECT * FROM generals" cursor.execute(sql) results = cursor.fetchall() # Contains all the books from hyperhamlet. Key of the book object is {internalID title} all_venues = {} for row in results: if row["parent"] == root_node_id: venues = re.search("^(#\d{6})\s(.*)_(.*)", row["name"]) venue = {} if venues: venue["venueInternalId"] = venues.group(1) venue["hasPlaceVenue"] = venues.group(3) # Create a key which has the following format{venueInternalId venueTitle} unique_key = "{} {}".format(venue["venueInternalId"], venue["hasPlaceVenue"]) # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters venue_id = id.generate(unique_key) # Adding the book to the allBooks object all_venues[venue_id] = venue conn.close() cursor.close() return all_venues except Exception as err: print(err)
def prepare(): try: conn = pymysql.connect(host='localhost', port=8889, user='******', password='******', database='HAMLET') cursor = conn.cursor(pymysql.cursors.DictCursor) sql = "SELECT * FROM generals" cursor.execute(sql) results = cursor.fetchall() # Contains all the books from hyperhamlet. Key of the book object is {internalID title} all_companies = {} for row in results: if row["parent"] == root_node_id: companies = re.search("^(#\d{6})\s(.*)", row["name"]) company = {} if companies: company["hasCompanyInternalId"] = companies.group(1) company["hasCompanyTitle"] = companies.group(2) # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters company_id = id.generate(company["hasCompanyInternalId"]) # Adding the book to the allBooks object all_companies[company_id] = company conn.close() cursor.close() return all_companies except Exception as err: print(err)
def prepare(): try: conn = pymysql.connect(host='localhost', port=8889, user='******', password='******', database='HAMLET') cursor = conn.cursor(pymysql.cursors.DictCursor) sql = 'SELECT * FROM modifications' cursor.execute(sql) results = cursor.fetchall() # Contains all the lexias from hyperhamlet. Key of the lexia object is {internalID title} all_lexia = {} for row in results: lexia = ed.info(row["name"], None, None) if lexia: # Create a key which has the following format{firstName lastName} unique_key = "{} {}".format(lexia["hasLexiaTitle"], lexia["hasLexiaInternalId"]) lexia_id = id.generate(unique_key) all_lexia[lexia_id] = lexia conn.close() cursor.close() return all_lexia except Exception as err: print(err)
def create_sec_passage(sec_pa_id, pag, sec_bo): passage = { "hasText": "-", "hasDisplayedTitle": sec_bo["hasDisplayedTitle"], "hasResearchField": "Reading", "hasFunctionVoice": ["Not defined"], "hasMarking": ["Unmarked"], "hasStatus": "public", "occursIn": [] } if pag: passage["hasPage"] = pag if "hasPrefixDisplayedTitle" in sec_bo: passage["hasPrefixDisplayedTitle"] = sec_bo["hasPrefixDisplayedTitle"] # Set Regula Hohl Trillini as default contributor id_rh = id.generate("Hohl-Trillini Regula") if id_rh in allContributors: passage["wasContributedBy"] = id_rh passages[sec_pa_id] = passage
def prepare_csv(): try: with open("01_prepare_scripts/csv/sec.csv") as f: csv_reader = csv.reader(f, delimiter=';') # line number in csv file line = 0 # Contains all the secondary books from hyperhamlet. Key of the book object is {internalID} all_sec_books = {} for row in csv_reader: # Skip first row with column title if line != 0: sec_book = {} sec_books = re.search("(@\d{6})\sSEC\s\-\s(.*)", row[13]) if sec_books: sec_book["hasBookInternalId"] = sec_books.group(1) sec_book["hasLanguage"] = row[9] # Gets book title and its prefix sec_book = pref.get_prefix_book( sec_books.group(2), sec_book) # Gets displayed title and its prefix (-> for all the sec passages the same) sec_book = pref.get_prefix_passage(row[3], sec_book) s = ed.info(row[4]) sec_book["pubInfo"] = s["pubInfo"] if row[5] and row[6]: sec_book[ "hasCreationDate"] = "GREGORIAN:{}:{}".format( row[5], row[6]) sec_book[ "hasPublicationDate"] = "GREGORIAN:{}:{}".format( row[5], row[6]) if row[18]: sec_book["hasGenre"] = [] sec_book["hasGenre"].append(row[18]) # Multiple subjects if row[19]: sec_book["hasSubject"] = row[19].split(" / ") # ---------- AUTHOR # Multiple names of authors names = row[2].split(" / ") authors = [] # Iterates through the names per row/ entry for name in names: # Checks if author name is invalid if not name: print("FAIL Author in SEC", line) raise SystemExit(0) authors.append(name.strip()) sec_book["authors"] = authors # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters sec_book_id = id.generate( sec_book["hasBookInternalId"]) # Adding the book to the allBooks object all_sec_books[sec_book_id] = sec_book else: print("FAIL prep_sec_books.py", line, row[13]) raise SystemExit(0) line += 1 return all_sec_books except Exception as err: print("FAIL: prep_sec_books.py", err, row[13]) raise SystemExit(0)
def start(): # Reads the csv files for csv_file in csv_files: try: with open(csv_file) as f: csv_reader = csv.reader(f, delimiter=';') # line number in csv file line = 0 for row in csv_reader: # Skip first row with column title if line != 0: # ---------- AUTHOR # Multiple names of authors names = row[2].split(" / ") # Iterates through the names per entry for name in names: # Generates author id author_id = id.generate(name) # Checks if author_id is valid if author_id not in allAuthors: print("FAIL Author", author_id, line, csv_file) raise SystemExit(0) # Creates author if it does not exist if author_id not in authors: create_author(author_id) # ---------- BOOK # Extracts internal id and title key_id, key_title = prep_books.get_key_for_id(row[13]) # Generates book id if key_id and key_title: book_id = id.generate("{} {}".format( key_id, key_title)) else: print("FAIL Book Title & ID", row[13], line, csv_file) raise SystemExit(0) # Checks if book_id is valid if book_id not in allBooks: print("FAIL Book", book_id, row[13], line, csv_file) raise SystemExit(0) publication = ed.info(row[4]) publication_original = ed.info(row[26]) # Creates the book if its new if book_id not in books: create_book(book_id, row, publication, publication_original, allBooks[book_id]) # Comments comments = comment.info(row[11]) # Updates the author references, genre and comment update_book(book_id, names, publication, None, None, row[18], None, None, comments) # ------------- PASSAGE # ##### Variant 1 (old) ##### # Generates passage id # passage_id = id.generate(row[10]) # ##### Variant 2 (new) ##### # Generates passage id with random generator unique_key = random.randint(1000000, 9999999) passage_id = id.generate(str(unique_key)) # Extracts displayed title and prefix pass_title = pref.get_prefix_passage(row[3], {}) # Creates the passage and updates the edition reference if passage_id not in passages: create_passage(passage_id, pass_title, row[10], row[25], publication, publication_original) # Updates the book reference and passage comment update_passage(passage_id, book_id, None, None, None, None, None, None, comments) # Multiple subjects subjects = row[19].split(" / ") for subject in subjects: update_book(book_id, None, None, None, None, None, subject, None, None) # Function voice f_voices = row[21].split(" / ") for f_voice in f_voices: if f_voice == "BODY OF TEXT" and row[20]: narratives = row[20].split(" / ") for narrative in narratives: update_passage(passage_id, None, None, None, None, None, narrative, None, None) elif f_voice == "BODY OF TEXT" and not row[20]: update_passage(passage_id, None, None, None, None, None, "Not defined", None, None) elif not f_voice and row[20]: print("FAIL - No function but narrative", row[20], csv_file, line) raise SystemExit(0) else: update_passage(passage_id, None, None, None, None, None, f_voice, None, None) # Evaluates markings # Checks if column is empty and sets unmarked work_value = "Work unmarked" if not row[16] else row[16] author_value = "Author unmarked" if not row[ 17] else row[17] if (work_value == "Work unmarked" and author_value == "Author unmarked" and not row[22]) or ( work_value == "Work unmarked" and author_value == "Author unmarked" and row[22] == "Local reference"): update_passage(passage_id, None, None, None, None, None, None, "Unmarked", None) else: if work_value != "Work unmarked": # Checks if work marking has multiple values if " / " in work_value: work_marked = row[16].split(" / ") for w_m in work_marked: update_passage(passage_id, None, None, None, None, None, None, w_m, None) else: update_passage(passage_id, None, None, None, None, None, None, work_value, None) if author_value != "Author unmarked": update_passage(passage_id, None, None, None, None, None, None, author_value, None) if row[22]: for marking in pas.get_marking(row[22]): update_passage(passage_id, None, None, None, None, None, None, marking, None) # ------------- CONTRIBUTOR # Generates contributor id contributor_id = id.generate(row[27].strip()) # Checks if contributor_id is valid if contributor_id not in allContributors: print("FAIL Contributor", contributor_id, csv_file, line) raise SystemExit(0) # Creates the contributor and updates the passage reference if contributor_id not in contributors: create_contributor(contributor_id) # Updates the contributor reference update_passage(passage_id, None, contributor_id, None, None, None, None, None, None) # -------------- SECONDARY BOOK sec_books = sec.info(row[24], line, csv_file) # Sets the research field "Reading" when there is no sec books if len(sec_books) == 0: update_passage(passage_id, None, None, None, None, "Reading", None, None, None) for sec_book in sec_books: sec_book_id = id.generate(sec_book["id"]) # Checks if sec book id is valid if sec_book_id not in allSecBooks: print("FAIL Secondary Book", sec_book["id"], csv_file, line, row[24]) raise SystemExit(0) s_book = allSecBooks[sec_book_id] if sec_book_id not in books: create_sec_book(sec_book_id, s_book) # Sets the default research field if "Fulltext database" in s_book["hasGenre"]: update_passage(passage_id, None, None, None, None, "Electronic Search", None, None, None) # Sets the research field if not "Fulltext database" in s_book["hasGenre"]: update_passage(passage_id, None, None, None, None, "Previous Research", None, None, None) # Updates the authors reference update_sec_book(sec_book_id, s_book["authors"]) # Creates sec passage id with random generator unique_key = random.randint(100000, 999999) sec_passage_id = id.generate(str(unique_key)) create_sec_passage(sec_passage_id, sec_book["page"], s_book) update_sec_passage(sec_passage_id, sec_book_id) update_passage(passage_id, None, None, sec_passage_id, None, None, None, None, None) # --------------- COMPANY & VENUES if row[12]: comp_ven_names = row[12].split(" / ") for comp_ven_name in comp_ven_names: comp_ven_data, type_1 = comp_ven.info( comp_ven_name, line, csv_file) if type_1 == "venue": unique_key = "{} {}".format( comp_ven_data["hasVenueInternalId"], comp_ven_data["hasPlaceVenue"]) venue_id = id.generate(unique_key) if venue_id not in allVenues: print("FAIL Venue", venue_id, line, csv_file) if venue_id not in venues: create_venue(venue_id, comp_ven_data) # Updates the venue reference update_book(book_id, None, None, venue_id, None, None, None, None, None) elif type_1 == "company": company_id = id.generate( comp_ven_data["hasCompanyInternalId"]) if company_id not in allCompanies: print("FAIL Company", company_id, line, csv_file) if company_id not in companies: create_company(company_id, comp_ven_data) # Updates the company reference update_book(book_id, None, None, None, company_id, None, None, None, None) # --------------- LEXIA # Multiple names of authors lex_names = row[15].split(" / ") for lex_name in lex_names: le = lex.info(lex_name, line, csv_file) # Creates a key which has the following format {title internalID}. # {internalID title} will cause error later because it already exists unique_key = "{} {}".format( le["hasLexiaTitle"], le["hasLexiaInternalId"]) lexia_id = id.generate(unique_key) if lexia_id not in allLexias: print("FAIL Lexia", lexia_id, le, line, csv_file) raise SystemExit(0) if lexia_id not in lexias: create_lexia(lexia_id, le) # Updates the lexia reference update_passage(passage_id, None, None, None, lexia_id, None, None, None, None) isLexiaAuthor = id.generate(le["hasLexiaTitle"]) if isLexiaAuthor in authors: # Internal ID of author must be overwritten because it is generated # from incrementation and in this case the ID comes from the user update_author(isLexiaAuthor, le["hasLexiaInternalId"], lexia_id) key = "{} {}".format(le["hasLexiaInternalId"], le["hasLexiaTitle"]) isLexiaBookVenue = id.generate(key) if isLexiaBookVenue in books: update_book(isLexiaBookVenue, None, None, None, None, None, None, lexia_id, None) if isLexiaBookVenue in venues: update_venue(isLexiaBookVenue, lexia_id) isLexiaCompany = id.generate( le["hasLexiaInternalId"]) if isLexiaCompany in companies: update_company(isLexiaCompany, lexia_id, None) # --------------- ACTORS if row[14]: actors = row[14].split(" / ") for actor in actors: # Generates author id actor_id = id.generate(actor) # Checks if author_id is valid if actor_id in allAuthors: update_actor(actor_id, book_id) line += 1 except Exception as err: print("FAIL: start.py", err, line, csv_file) raise SystemExit(0) # ------------------------------------------ # Adds non-authors which do not have entries try: with open(non_authors) as a: csv_reader = csv.reader(a, delimiter=";") # line number in csv file line2 = 0 for row in csv_reader: # Skip first row with column title if line2 != 0: # Multiple names of authors names = row[2].split(" / ") # Iterates through the names per entry for name in names: # Generates author id author_id = id.generate(name) # Checks if author_id is valid if author_id not in allAuthors: print("FAIL Non-Author", author_id, line2, non_authors) raise SystemExit(0) # Creates author if it does not exist if author_id not in authors: create_author(author_id) line2 += 1 except Exception as err: print("FAIL: non_authors.csv") raise SystemExit(0) # ------------------------------------------ # Adds non-venues which are not linked in the regular entries try: with open(non_venues) as v: csv_reader = csv.reader(v, delimiter=";") # line number in csv file line3 = 0 for row in csv_reader: # Skip first row with column title if line3 != 0: ven_names = row[12].split(" / ") for ven_name in ven_names: ven_data, type_2 = comp_ven.info( ven_name, line3, non_venues) if type_2 == "venue": unique_key = "{} {}".format( ven_data["hasVenueInternalId"], ven_data["hasPlaceVenue"]) venue_id = id.generate(unique_key) if venue_id not in allVenues: print("FAIL Non-Venues", venue_id, line3, non_venues) raise SystemExit(0) if venue_id not in venues: create_venue(venue_id, ven_data) line3 += 1 except Exception as err: print("FAIL: non_venues.csv") raise SystemExit(0) # ------------------------------------------ # Sets relation between human and company try: with open(human_company) as h: csv_reader = csv.reader(h, delimiter=';') # line number in csv file line4 = 0 for row in csv_reader: # Skip first row with column title if line4 != 0: # Generates id for human human_id = id.generate(row[2]) # Checks if author_id is valid if human_id not in allAuthors: print("FAIL human_company -> human", author_id, line4, human_company) raise SystemExit(0) comp_names = row[12].split(" / ") for comp_name in comp_names: comp_data, type_3 = comp_ven.info( comp_name, line, human_company) if type_3 == "company": # Generates id for company company_id = id.generate( comp_data["hasCompanyInternalId"]) # Checks if company_id is valid if company_id not in allCompanies: print("FAIL human_company -> company", company_id, line, human_company) raise SystemExit(0) # Creates company if it does not exist if company_id not in companies: create_company(company_id, comp_data) # Creates author if it does not exist if human_id not in authors: create_author(human_id) # Adds the human to the company update_company(company_id, None, human_id) line4 += 1 except Exception as err: print("FAIL: human_company.csv") raise SystemExit(0) # ------------------------------------------ # Saves the objects which occurs in the csv files in to json files json.save(json_files[0], authors) json.save(json_files[1], books) json.save(json_files[2], passages) json.save(json_files[3], contributors) json.save(json_files[4], lexias) json.save(json_files[5], companies) json.save(json_files[6], venues) # Checks all the cardinality check_persons() check_books() check_passages() check_lexias() check_companies() check_venues()
def update_book(b_id, auth_names, pub_info, ven_id, comp_id, gen, sub, lex_id, com): if auth_names: # Iterates through the names per entry for auth_name in auth_names: # Generates author id auth_id = id.generate(auth_name) # Checks if author already exists if auth_id not in authors: print("Strange UPDATE BOOK") create_author(auth_id) else: temp = set(books[b_id]["isWrittenBy"]) temp.add(auth_id) books[b_id]["isWrittenBy"] = list(temp) if pub_info: if "pubInfo" in pub_info and not "hasEdition" in books[b_id]: books[b_id]["hasEdition"] = pub_info["pubInfo"] if ven_id: if "performedIn" not in books[b_id]: books[b_id]["performedIn"] = [] temp = set(books[b_id]["performedIn"]) temp.add(ven_id) books[b_id]["performedIn"] = list(temp) if comp_id: if "performedBy" not in books[b_id]: books[b_id]["performedBy"] = [] temp = set(books[b_id]["performedBy"]) temp.add(comp_id) books[b_id]["performedBy"] = list(temp) if gen: if gen != "Theatre": if "hasGenre" not in books[b_id]: books[b_id]["hasGenre"] = [] temp = set(books[b_id]["hasGenre"]) temp.add(gen) books[b_id]["hasGenre"] = list(temp) if sub: if "hasSubject" not in books[b_id]: books[b_id]["hasSubject"] = [] temp = set(books[b_id]["hasSubject"]) temp.add(sub) books[b_id]["hasSubject"] = list(temp) if lex_id: books[b_id]["isLexiaBook"] = lex_id if com: if "hasBookComment" in com and "hasBookComment" not in books[b_id]: books[b_id]["hasBookComment"] = com["hasBookComment"]
def prepare(): try: conn = pymysql.connect(host='localhost', port=8889, user='******', password='******', database='HAMLET') cursor = conn.cursor(pymysql.cursors.DictCursor) sql = "SELECT * FROM linecategories" cursor.execute(sql) results = cursor.fetchall() # Contains all the books from hyperhamlet. Key of the book object is {internalID title} all_books = {} for row in results: books = re.search("(@\d{6})(.*)", row["name"]) if books: book = {} sec = re.search("\sSEC\s\-\s(.*)", books.group(2)) if not sec: only_a = "_A" in books.group(2) only_d = "_D" in books.group(2) if only_a and only_d: dates_a_d = re.search( "(_A((\d{2})-(\d{2})|(\d{2})))(_D((\d{2})-(\d{2})|(\d{2}))) (.*)", books.group(2)) if not dates_a_d: print( "Fail - prep_books.py: Wrong pattern (1) in book title" ) raise SystemExit(0) book = add_performance_dates(dates_a_d.group(3), dates_a_d.group(4), dates_a_d.group(5), book) book = add_publish_dates(dates_a_d.group(8), dates_a_d.group(9), dates_a_d.group(10), book) book = pref.get_prefix_book(dates_a_d.group(11), book) elif only_a and not only_d: dates_a = re.search( "(_A((\d{2})-(\d{2})|(\d{2})))(.*)", books.group(2)) if not dates_a: print( "Fail - prep_books.py: Wrong pattern (2) in book title" ) raise SystemExit(0) book = add_performance_dates(dates_a.group(3), dates_a.group(4), dates_a.group(5), book) book = pref.get_prefix_book( dates_a.group(6).strip(), book) elif only_d and not only_a: dates_d = re.search( "(_D((\d{2})-(\d{2})|(\d{2})))(.*)", books.group(2)) if not dates_d: print( "Fail - prep_books.py: Wrong pattern (3) in book title" ) raise SystemExit(0) book = add_publish_dates(dates_d.group(3), dates_d.group(4), dates_d.group(5), book) book = pref.get_prefix_book( dates_d.group(6).strip(), book) else: book = pref.get_prefix_book( books.group(2).strip(), book) if "hasBookTitle" in book: # Adding internal ID book["hasBookInternalId"] = books.group(1) # Create a key which has the following format {internalID (article) title} if "hasPrefixBookTitle" in book: unique_key = "{} {} {}".format( book["hasBookInternalId"], book["hasPrefixBookTitle"], book["hasBookTitle"]) else: unique_key = "{} {}".format( book["hasBookInternalId"], book["hasBookTitle"]) # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters book_id = id.generate(unique_key) # Adding ID of SQL table book["sql"] = row["id"] # Adding the book to the allBooks object all_books[book_id] = book else: print("prep_books.py: FAIL - _A or _D not found", books.group(2)) conn.close() cursor.close() return all_books except Exception as err: print(err)
def prepare(): try: conn = pymysql.connect(host='localhost', port=8889, user='******', password='******', database='HAMLET') cursor = conn.cursor(pymysql.cursors.DictCursor) sql = "SELECT * FROM authors ORDER BY lastname" cursor.execute(sql) results = cursor.fetchall() # Contains all the authors from hyperhamlet. Key of the author object is {firstName lastName} all_authors = {} for row in results: author = { "hasFirstName": row["firstname"], "hasLastName": row["lastname"] } # Start with empty description description = "" if row["description"]: # ignore description if there is exclamation mark stop = re.search("!(.*)", row["description"]) # birth date birth = re.search("(.*)b\.\s(.*)", row["description"]) # death date death = re.search("(.*)d\.\s(.*)", row["description"]) # florouit date floruit = re.search("(.*)fl\.\s(.*)", row["description"]) # birth date as span birthDeath = re.search( "(.*?)(\[(\d{1,4})-(\d{1,4})\]|(\d{1,4}))-(\[(\d{1,4})-(\d{1,4})\]|(\d{1,4}))(.*)", row["description"]) # female authors female = re.search("(\*)(.*)", row["description"]) if stop: # dates are saved in the description description = stop.group(1) elif birth: birth_span = re.search( "(\[(\d{1,4})-(\d{1,4})\]|(\d{1,4}))(.*)", birth.group(2)) if birth_span.group(4) is None: author["birthStart"] = birth_span.group(2) author["birthEnd"] = birth_span.group(3) # print("Not Exact Birth", birth_span.groups(), row) else: author["birthExact"] = birth_span.group(1) # print("Exact Birth", birth_span.groups(), row) # add description description = birth.group(1) + birth_span.group(5) elif death: death_span = re.search( "(\[(\d{1,4})-(\d{1,4})\]|(\d{1,4}))(.*)", death.group(2)) if death_span.group(4) is None: author["deathStart"] = death_span.group(2) author["deathEnd"] = death_span.group(3) # print("Not Exact Death", death_span.groups(), row) else: author["deathExact"] = death_span.group(1) # print("Exact Death", death_span.groups(), row) # add description description = death.group(1) + death_span.group(5) elif floruit: floruit_span = re.search( "((\d{1,4})-(\d{1,4})|(\d{1,4}))(.*)", floruit.group(2)) if floruit_span is not None: # print(floruit_span.groups()) if floruit_span.group(4) is None: author["activeStart"] = floruit_span.group(2) author["activeEnd"] = floruit_span.group(3) # print("Not Exact Floruit", floruit_span.groups(), row["description"]) else: author["activeExact"] = floruit_span.group(1) # print("Exact Floruit", floruit_span.groups(), row["description"]) # add description description = floruit.group(1) + floruit_span.group(5) elif birthDeath: if birthDeath.group(3): author["birthStart"] = birthDeath.group(3) author["birthEnd"] = birthDeath.group(4) # print("Span Birth: ", birthDeath.groups(), row) else: author["birthExact"] = birthDeath.group(5) # print("Exact Birth in Span: ", birthDeath.groups(), row) if birthDeath.group(7): author["deathStart"] = birthDeath.group(7) author["deathEnd"] = birthDeath.group(8) # print("Span Death: ", birthDeath.groups(), row) else: author["deathExact"] = birthDeath.group(9) # print("Exact Death in Span: ", birthDeath.groups(), row) # add description description = birthDeath.group(1) + birthDeath.group(10) # all other descriptions which does not follow the defined rules else: description = row["description"] # checks if there are only whitespaces or empty strings (excluding year numbers) if description.strip(): # add description to author after it has trimmed the string and removed the '*' (for gender) author["hasDescription"] = description.rstrip().replace( '*', '') # add sex of author if female: author["hasGender"] = "female" else: author["hasGender"] = "male" # Create a key which has the following format{firstName lastName} unique_key = "{} {}".format(author["hasFirstName"], author["hasLastName"]) # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters author_id = id.generate(unique_key) # Adding ID of SQL table author["sql"] = row["id"] # Adding the author to the all_author object all_authors[author_id] = author conn.close() cursor.close() return all_authors except Exception as err: print(err) raise SystemExit(0)