def prepare():
    try:
        conn = pymysql.connect(host='localhost',
                               port=8889,
                               user='******',
                               password='******',
                               database='HAMLET')

        cursor = conn.cursor(pymysql.cursors.DictCursor)

        sql = "SELECT * FROM contributors"

        cursor.execute(sql)

        results = cursor.fetchall()

        # Contains all the contributors from hyperhamlet. Key of the contributor object is {firstname lastname}
        all_contributors = {}

        for row in results:

            contributor = {
                "hasFirstName": row["firstname"].strip(),
                "hasLastName": row["lastname"].strip(),
                "hasGender": "male"
            }

            # Email is assigned to description after '*' is removed from string
            if row["email"]:
                contributor["hasDescription"] = row["email"].replace('*', '')

                # Sets the gender of to female
                female = re.search("(\*)(.*)", row["email"])
                if female:
                    contributor["hasGender"] = "female"

            # Create a key which has the following format{firstName lastName}
            unique_key = "{} {}".format(contributor["hasLastName"],
                                        contributor["hasFirstName"])

            # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters
            contributor_id = id.generate(unique_key)

            # Adding ID of SQL table
            contributor["sql"] = row["id"]

            # Adding the author to the all_author object
            all_contributors[contributor_id] = contributor

        conn.close()
        cursor.close()

        return all_contributors

    except Exception as err:
        print(err)
Exemple #2
0
def prepare():
    try:
        conn = pymysql.connect(host='localhost',
                               port=8889,
                               user='******',
                               password='******',
                               database='HAMLET')

        cursor = conn.cursor(pymysql.cursors.DictCursor)

        sql = "SELECT * FROM linecategories"

        cursor.execute(sql)

        results = cursor.fetchall()

        # Contains all the secondary books from hyperhamlet. Key of the book object is {internalID}
        all_sec_books = {}

        for row in results:

            books = re.search("(@\d{6})\s(.*)", row["name"])

            if books:

                book = {}
                sec = re.search("SEC\s\-\s(.*)", books.group(2))

                if sec:
                    # id of the linecategory row in table
                    # print("SEC ID: " + books.group(1) + " | SEC TITLE: " + sec.group(1))
                    book["hasBookInternalId"] = books.group(1)
                    book = pref.get_prefix_book(sec.group(1), book)

                    # Create a key which has the following format {internalID}
                    unique_key = book["hasBookInternalId"]

                    # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters
                    book_id = id.generate(unique_key)

                    # Adding ID of SQL table
                    # book["sql"] = row["id"]

                    # Adding the book to the allBooks object
                    all_sec_books[book_id] = book

        conn.close()
        cursor.close()

        return all_sec_books

    except Exception as err:
        print(err)
def update_sec_book(sec_b_id, auth_names):
    # Iterates through the names per entry
    for auth_name in auth_names:

        # Generates author id
        auth_id = id.generate(auth_name)

        # Checks if author already exists
        if auth_id not in authors:
            create_author(auth_id)

        temp = set(books[sec_b_id]["isWrittenBy"])
        temp.add(auth_id)
        books[sec_b_id]["isWrittenBy"] = list(temp)
def prepare():
    try:
        conn = pymysql.connect(host='localhost', port=8889, user='******', password='******', database='HAMLET')

        cursor = conn.cursor(pymysql.cursors.DictCursor)

        sql = "SELECT * FROM generals"

        cursor.execute(sql)

        results = cursor.fetchall()

        # Contains all the books from hyperhamlet. Key of the book object is {internalID title}
        all_venues = {}

        for row in results:

            if row["parent"] == root_node_id:

                venues = re.search("^(#\d{6})\s(.*)_(.*)", row["name"])

                venue = {}

                if venues:
                    venue["venueInternalId"] = venues.group(1)
                    venue["hasPlaceVenue"] = venues.group(3)

                    # Create a key which has the following format{venueInternalId venueTitle}
                    unique_key = "{} {}".format(venue["venueInternalId"], venue["hasPlaceVenue"])

                    # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters
                    venue_id = id.generate(unique_key)

                    # Adding the book to the allBooks object
                    all_venues[venue_id] = venue

        conn.close()
        cursor.close()

        return all_venues

    except Exception as err:
        print(err)
def prepare():
    try:
        conn = pymysql.connect(host='localhost', port=8889, user='******', password='******', database='HAMLET')

        cursor = conn.cursor(pymysql.cursors.DictCursor)

        sql = "SELECT * FROM generals"

        cursor.execute(sql)

        results = cursor.fetchall()

        # Contains all the books from hyperhamlet. Key of the book object is {internalID title}
        all_companies = {}

        for row in results:
            if row["parent"] == root_node_id:

                companies = re.search("^(#\d{6})\s(.*)", row["name"])

                company = {}

                if companies:
                    company["hasCompanyInternalId"] = companies.group(1)
                    company["hasCompanyTitle"] = companies.group(2)

                    # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters
                    company_id = id.generate(company["hasCompanyInternalId"])

                    # Adding the book to the allBooks object
                    all_companies[company_id] = company

        conn.close()
        cursor.close()

        return all_companies

    except Exception as err:
        print(err)
def prepare():
    try:
        conn = pymysql.connect(host='localhost',
                               port=8889,
                               user='******',
                               password='******',
                               database='HAMLET')

        cursor = conn.cursor(pymysql.cursors.DictCursor)

        sql = 'SELECT * FROM modifications'

        cursor.execute(sql)

        results = cursor.fetchall()

        # Contains all the lexias from hyperhamlet. Key of the lexia object is {internalID title}
        all_lexia = {}

        for row in results:

            lexia = ed.info(row["name"], None, None)

            if lexia:
                # Create a key which has the following format{firstName lastName}
                unique_key = "{} {}".format(lexia["hasLexiaTitle"],
                                            lexia["hasLexiaInternalId"])

                lexia_id = id.generate(unique_key)
                all_lexia[lexia_id] = lexia

        conn.close()
        cursor.close()

        return all_lexia

    except Exception as err:
        print(err)
def create_sec_passage(sec_pa_id, pag, sec_bo):
    passage = {
        "hasText": "-",
        "hasDisplayedTitle": sec_bo["hasDisplayedTitle"],
        "hasResearchField": "Reading",
        "hasFunctionVoice": ["Not defined"],
        "hasMarking": ["Unmarked"],
        "hasStatus": "public",
        "occursIn": []
    }

    if pag:
        passage["hasPage"] = pag

    if "hasPrefixDisplayedTitle" in sec_bo:
        passage["hasPrefixDisplayedTitle"] = sec_bo["hasPrefixDisplayedTitle"]

    # Set Regula Hohl Trillini as default contributor
    id_rh = id.generate("Hohl-Trillini Regula")
    if id_rh in allContributors:
        passage["wasContributedBy"] = id_rh

    passages[sec_pa_id] = passage
Exemple #8
0
def prepare_csv():
    try:
        with open("01_prepare_scripts/csv/sec.csv") as f:
            csv_reader = csv.reader(f, delimiter=';')

            # line number in csv file
            line = 0

            # Contains all the secondary books from hyperhamlet. Key of the book object is {internalID}
            all_sec_books = {}

            for row in csv_reader:

                # Skip first row with column title
                if line != 0:
                    sec_book = {}

                    sec_books = re.search("(@\d{6})\sSEC\s\-\s(.*)", row[13])
                    if sec_books:
                        sec_book["hasBookInternalId"] = sec_books.group(1)
                        sec_book["hasLanguage"] = row[9]
                        # Gets book title and its prefix
                        sec_book = pref.get_prefix_book(
                            sec_books.group(2), sec_book)
                        # Gets displayed title and its prefix (-> for all the sec passages the same)
                        sec_book = pref.get_prefix_passage(row[3], sec_book)

                        s = ed.info(row[4])
                        sec_book["pubInfo"] = s["pubInfo"]

                        if row[5] and row[6]:
                            sec_book[
                                "hasCreationDate"] = "GREGORIAN:{}:{}".format(
                                    row[5], row[6])
                            sec_book[
                                "hasPublicationDate"] = "GREGORIAN:{}:{}".format(
                                    row[5], row[6])

                        if row[18]:
                            sec_book["hasGenre"] = []
                            sec_book["hasGenre"].append(row[18])

                        # Multiple subjects
                        if row[19]:
                            sec_book["hasSubject"] = row[19].split(" / ")

                        # ---------- AUTHOR
                        # Multiple names of authors
                        names = row[2].split(" / ")

                        authors = []

                        # Iterates through the names per row/ entry
                        for name in names:

                            # Checks if author name is invalid
                            if not name:
                                print("FAIL Author in SEC", line)
                                raise SystemExit(0)

                            authors.append(name.strip())

                        sec_book["authors"] = authors

                        # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters
                        sec_book_id = id.generate(
                            sec_book["hasBookInternalId"])

                        # Adding the book to the allBooks object
                        all_sec_books[sec_book_id] = sec_book
                    else:
                        print("FAIL prep_sec_books.py", line, row[13])
                        raise SystemExit(0)

                line += 1

        return all_sec_books

    except Exception as err:
        print("FAIL: prep_sec_books.py", err, row[13])
        raise SystemExit(0)
def start():
    # Reads the csv files
    for csv_file in csv_files:

        try:
            with open(csv_file) as f:
                csv_reader = csv.reader(f, delimiter=';')

                # line number in csv file
                line = 0

                for row in csv_reader:

                    # Skip first row with column title
                    if line != 0:

                        # ---------- AUTHOR
                        # Multiple names of authors
                        names = row[2].split(" / ")

                        # Iterates through the names per entry
                        for name in names:

                            # Generates author id
                            author_id = id.generate(name)

                            # Checks if author_id is valid
                            if author_id not in allAuthors:
                                print("FAIL Author", author_id, line, csv_file)
                                raise SystemExit(0)

                            # Creates author if it does not exist
                            if author_id not in authors:
                                create_author(author_id)

                        # ---------- BOOK
                        # Extracts internal id and title
                        key_id, key_title = prep_books.get_key_for_id(row[13])

                        # Generates book id
                        if key_id and key_title:
                            book_id = id.generate("{} {}".format(
                                key_id, key_title))
                        else:
                            print("FAIL Book Title & ID", row[13], line,
                                  csv_file)
                            raise SystemExit(0)

                        # Checks if book_id is valid
                        if book_id not in allBooks:
                            print("FAIL Book", book_id, row[13], line,
                                  csv_file)
                            raise SystemExit(0)

                        publication = ed.info(row[4])
                        publication_original = ed.info(row[26])

                        # Creates the book if its new
                        if book_id not in books:
                            create_book(book_id, row, publication,
                                        publication_original,
                                        allBooks[book_id])

                        # Comments
                        comments = comment.info(row[11])

                        # Updates the author references, genre and comment
                        update_book(book_id, names, publication, None, None,
                                    row[18], None, None, comments)

                        # ------------- PASSAGE

                        # ##### Variant 1 (old) #####
                        # Generates passage id
                        # passage_id = id.generate(row[10])

                        # ##### Variant 2 (new) #####
                        # Generates passage id with random generator
                        unique_key = random.randint(1000000, 9999999)
                        passage_id = id.generate(str(unique_key))

                        # Extracts displayed title and prefix
                        pass_title = pref.get_prefix_passage(row[3], {})

                        # Creates the passage and updates the edition reference
                        if passage_id not in passages:
                            create_passage(passage_id, pass_title, row[10],
                                           row[25], publication,
                                           publication_original)

                        # Updates the book reference and passage comment
                        update_passage(passage_id, book_id, None, None, None,
                                       None, None, None, comments)

                        # Multiple subjects
                        subjects = row[19].split(" / ")

                        for subject in subjects:
                            update_book(book_id, None, None, None, None, None,
                                        subject, None, None)

                        # Function voice
                        f_voices = row[21].split(" / ")

                        for f_voice in f_voices:

                            if f_voice == "BODY OF TEXT" and row[20]:
                                narratives = row[20].split(" / ")

                                for narrative in narratives:
                                    update_passage(passage_id, None, None,
                                                   None, None, None, narrative,
                                                   None, None)
                            elif f_voice == "BODY OF TEXT" and not row[20]:
                                update_passage(passage_id, None, None, None,
                                               None, None, "Not defined", None,
                                               None)
                            elif not f_voice and row[20]:
                                print("FAIL - No function but narrative",
                                      row[20], csv_file, line)
                                raise SystemExit(0)
                            else:
                                update_passage(passage_id, None, None, None,
                                               None, None, f_voice, None, None)

                        # Evaluates markings
                        # Checks if column is empty and sets unmarked
                        work_value = "Work unmarked" if not row[16] else row[16]
                        author_value = "Author unmarked" if not row[
                            17] else row[17]

                        if (work_value == "Work unmarked"
                                and author_value == "Author unmarked"
                                and not row[22]) or (
                                    work_value == "Work unmarked"
                                    and author_value == "Author unmarked"
                                    and row[22] == "Local reference"):
                            update_passage(passage_id, None, None, None, None,
                                           None, None, "Unmarked", None)
                        else:

                            if work_value != "Work unmarked":
                                # Checks if work marking has multiple values
                                if " / " in work_value:
                                    work_marked = row[16].split(" / ")

                                    for w_m in work_marked:
                                        update_passage(passage_id, None, None,
                                                       None, None, None, None,
                                                       w_m, None)
                                else:
                                    update_passage(passage_id, None, None,
                                                   None, None, None, None,
                                                   work_value, None)

                            if author_value != "Author unmarked":
                                update_passage(passage_id, None, None, None,
                                               None, None, None, author_value,
                                               None)

                            if row[22]:
                                for marking in pas.get_marking(row[22]):
                                    update_passage(passage_id, None, None,
                                                   None, None, None, None,
                                                   marking, None)

                        # ------------- CONTRIBUTOR
                        # Generates contributor id
                        contributor_id = id.generate(row[27].strip())

                        # Checks if contributor_id is valid
                        if contributor_id not in allContributors:
                            print("FAIL Contributor", contributor_id, csv_file,
                                  line)
                            raise SystemExit(0)

                        # Creates the contributor and updates the passage reference
                        if contributor_id not in contributors:
                            create_contributor(contributor_id)

                        # Updates the contributor reference
                        update_passage(passage_id, None, contributor_id, None,
                                       None, None, None, None, None)

                        # -------------- SECONDARY BOOK
                        sec_books = sec.info(row[24], line, csv_file)

                        # Sets the research field "Reading" when there is no sec books
                        if len(sec_books) == 0:
                            update_passage(passage_id, None, None, None, None,
                                           "Reading", None, None, None)

                        for sec_book in sec_books:

                            sec_book_id = id.generate(sec_book["id"])

                            # Checks if sec book id is valid
                            if sec_book_id not in allSecBooks:
                                print("FAIL Secondary Book", sec_book["id"],
                                      csv_file, line, row[24])
                                raise SystemExit(0)

                            s_book = allSecBooks[sec_book_id]

                            if sec_book_id not in books:
                                create_sec_book(sec_book_id, s_book)

                            # Sets the default research field
                            if "Fulltext database" in s_book["hasGenre"]:
                                update_passage(passage_id, None, None, None,
                                               None, "Electronic Search", None,
                                               None, None)

                            # Sets the research field
                            if not "Fulltext database" in s_book["hasGenre"]:
                                update_passage(passage_id, None, None, None,
                                               None, "Previous Research", None,
                                               None, None)

                            # Updates the authors reference
                            update_sec_book(sec_book_id, s_book["authors"])

                            # Creates sec passage id with random generator
                            unique_key = random.randint(100000, 999999)
                            sec_passage_id = id.generate(str(unique_key))

                            create_sec_passage(sec_passage_id,
                                               sec_book["page"], s_book)
                            update_sec_passage(sec_passage_id, sec_book_id)
                            update_passage(passage_id, None, None,
                                           sec_passage_id, None, None, None,
                                           None, None)

                        # --------------- COMPANY & VENUES
                        if row[12]:
                            comp_ven_names = row[12].split(" / ")

                            for comp_ven_name in comp_ven_names:
                                comp_ven_data, type_1 = comp_ven.info(
                                    comp_ven_name, line, csv_file)

                                if type_1 == "venue":
                                    unique_key = "{} {}".format(
                                        comp_ven_data["hasVenueInternalId"],
                                        comp_ven_data["hasPlaceVenue"])

                                    venue_id = id.generate(unique_key)

                                    if venue_id not in allVenues:
                                        print("FAIL Venue", venue_id, line,
                                              csv_file)

                                    if venue_id not in venues:
                                        create_venue(venue_id, comp_ven_data)

                                    # Updates the venue reference
                                    update_book(book_id, None, None, venue_id,
                                                None, None, None, None, None)

                                elif type_1 == "company":
                                    company_id = id.generate(
                                        comp_ven_data["hasCompanyInternalId"])

                                    if company_id not in allCompanies:
                                        print("FAIL Company", company_id, line,
                                              csv_file)

                                    if company_id not in companies:
                                        create_company(company_id,
                                                       comp_ven_data)

                                    # Updates the company reference
                                    update_book(book_id, None, None, None,
                                                company_id, None, None, None,
                                                None)

                        # --------------- LEXIA
                        # Multiple names of authors
                        lex_names = row[15].split(" / ")

                        for lex_name in lex_names:
                            le = lex.info(lex_name, line, csv_file)

                            # Creates a key which has the following format {title internalID}.
                            # {internalID title} will cause error later because it already exists
                            unique_key = "{} {}".format(
                                le["hasLexiaTitle"], le["hasLexiaInternalId"])

                            lexia_id = id.generate(unique_key)

                            if lexia_id not in allLexias:
                                print("FAIL Lexia", lexia_id, le, line,
                                      csv_file)
                                raise SystemExit(0)

                            if lexia_id not in lexias:
                                create_lexia(lexia_id, le)

                            # Updates the lexia reference
                            update_passage(passage_id, None, None, None,
                                           lexia_id, None, None, None, None)

                            isLexiaAuthor = id.generate(le["hasLexiaTitle"])
                            if isLexiaAuthor in authors:
                                # Internal ID of author must be overwritten because it is generated
                                # from incrementation and in this case the ID comes from the user
                                update_author(isLexiaAuthor,
                                              le["hasLexiaInternalId"],
                                              lexia_id)

                            key = "{} {}".format(le["hasLexiaInternalId"],
                                                 le["hasLexiaTitle"])
                            isLexiaBookVenue = id.generate(key)

                            if isLexiaBookVenue in books:
                                update_book(isLexiaBookVenue, None, None, None,
                                            None, None, None, lexia_id, None)

                            if isLexiaBookVenue in venues:
                                update_venue(isLexiaBookVenue, lexia_id)

                            isLexiaCompany = id.generate(
                                le["hasLexiaInternalId"])
                            if isLexiaCompany in companies:
                                update_company(isLexiaCompany, lexia_id, None)

                        # --------------- ACTORS
                        if row[14]:
                            actors = row[14].split(" / ")

                            for actor in actors:

                                # Generates author id
                                actor_id = id.generate(actor)

                                # Checks if author_id is valid
                                if actor_id in allAuthors:
                                    update_actor(actor_id, book_id)

                    line += 1

        except Exception as err:
            print("FAIL: start.py", err, line, csv_file)
            raise SystemExit(0)

    # ------------------------------------------
    # Adds non-authors which do not have entries
    try:
        with open(non_authors) as a:

            csv_reader = csv.reader(a, delimiter=";")

            # line number in csv file
            line2 = 0

            for row in csv_reader:

                # Skip first row with column title
                if line2 != 0:

                    # Multiple names of authors
                    names = row[2].split(" / ")

                    # Iterates through the names per entry
                    for name in names:

                        # Generates author id
                        author_id = id.generate(name)

                        # Checks if author_id is valid
                        if author_id not in allAuthors:
                            print("FAIL Non-Author", author_id, line2,
                                  non_authors)
                            raise SystemExit(0)

                        # Creates author if it does not exist
                        if author_id not in authors:
                            create_author(author_id)

                line2 += 1

    except Exception as err:
        print("FAIL: non_authors.csv")
        raise SystemExit(0)

    # ------------------------------------------
    # Adds non-venues which are not linked in the regular entries
    try:
        with open(non_venues) as v:

            csv_reader = csv.reader(v, delimiter=";")

            # line number in csv file
            line3 = 0

            for row in csv_reader:

                # Skip first row with column title
                if line3 != 0:

                    ven_names = row[12].split(" / ")

                    for ven_name in ven_names:
                        ven_data, type_2 = comp_ven.info(
                            ven_name, line3, non_venues)

                        if type_2 == "venue":
                            unique_key = "{} {}".format(
                                ven_data["hasVenueInternalId"],
                                ven_data["hasPlaceVenue"])

                            venue_id = id.generate(unique_key)

                            if venue_id not in allVenues:
                                print("FAIL Non-Venues", venue_id, line3,
                                      non_venues)
                                raise SystemExit(0)

                            if venue_id not in venues:
                                create_venue(venue_id, ven_data)

                line3 += 1

    except Exception as err:
        print("FAIL: non_venues.csv")
        raise SystemExit(0)

    # ------------------------------------------
    # Sets relation between human and company
    try:
        with open(human_company) as h:

            csv_reader = csv.reader(h, delimiter=';')

            # line number in csv file
            line4 = 0

            for row in csv_reader:

                # Skip first row with column title
                if line4 != 0:

                    # Generates id for human
                    human_id = id.generate(row[2])

                    # Checks if author_id is valid
                    if human_id not in allAuthors:
                        print("FAIL human_company -> human", author_id, line4,
                              human_company)
                        raise SystemExit(0)

                    comp_names = row[12].split(" / ")

                    for comp_name in comp_names:
                        comp_data, type_3 = comp_ven.info(
                            comp_name, line, human_company)

                        if type_3 == "company":

                            # Generates id for company
                            company_id = id.generate(
                                comp_data["hasCompanyInternalId"])

                            # Checks if company_id is valid
                            if company_id not in allCompanies:
                                print("FAIL human_company -> company",
                                      company_id, line, human_company)
                                raise SystemExit(0)

                            # Creates company if it does not exist
                            if company_id not in companies:
                                create_company(company_id, comp_data)

                            # Creates author if it does not exist
                            if human_id not in authors:
                                create_author(human_id)

                            # Adds the human to the company
                            update_company(company_id, None, human_id)

                line4 += 1

    except Exception as err:
        print("FAIL: human_company.csv")
        raise SystemExit(0)

    # ------------------------------------------
    # Saves the objects which occurs in the csv files in to json files
    json.save(json_files[0], authors)
    json.save(json_files[1], books)
    json.save(json_files[2], passages)
    json.save(json_files[3], contributors)
    json.save(json_files[4], lexias)
    json.save(json_files[5], companies)
    json.save(json_files[6], venues)

    # Checks all the cardinality
    check_persons()
    check_books()
    check_passages()
    check_lexias()
    check_companies()
    check_venues()
def update_book(b_id, auth_names, pub_info, ven_id, comp_id, gen, sub, lex_id,
                com):
    if auth_names:
        # Iterates through the names per entry
        for auth_name in auth_names:

            # Generates author id
            auth_id = id.generate(auth_name)

            # Checks if author already exists
            if auth_id not in authors:
                print("Strange UPDATE BOOK")
                create_author(auth_id)
            else:
                temp = set(books[b_id]["isWrittenBy"])
                temp.add(auth_id)
                books[b_id]["isWrittenBy"] = list(temp)

    if pub_info:
        if "pubInfo" in pub_info and not "hasEdition" in books[b_id]:
            books[b_id]["hasEdition"] = pub_info["pubInfo"]

    if ven_id:
        if "performedIn" not in books[b_id]:
            books[b_id]["performedIn"] = []

        temp = set(books[b_id]["performedIn"])
        temp.add(ven_id)
        books[b_id]["performedIn"] = list(temp)

    if comp_id:
        if "performedBy" not in books[b_id]:
            books[b_id]["performedBy"] = []

        temp = set(books[b_id]["performedBy"])
        temp.add(comp_id)
        books[b_id]["performedBy"] = list(temp)

    if gen:
        if gen != "Theatre":
            if "hasGenre" not in books[b_id]:
                books[b_id]["hasGenre"] = []

            temp = set(books[b_id]["hasGenre"])
            temp.add(gen)
            books[b_id]["hasGenre"] = list(temp)

    if sub:
        if "hasSubject" not in books[b_id]:
            books[b_id]["hasSubject"] = []

        temp = set(books[b_id]["hasSubject"])
        temp.add(sub)
        books[b_id]["hasSubject"] = list(temp)

    if lex_id:
        books[b_id]["isLexiaBook"] = lex_id

    if com:
        if "hasBookComment" in com and "hasBookComment" not in books[b_id]:
            books[b_id]["hasBookComment"] = com["hasBookComment"]
def prepare():
    try:
        conn = pymysql.connect(host='localhost',
                               port=8889,
                               user='******',
                               password='******',
                               database='HAMLET')

        cursor = conn.cursor(pymysql.cursors.DictCursor)

        sql = "SELECT * FROM linecategories"

        cursor.execute(sql)

        results = cursor.fetchall()

        # Contains all the books from hyperhamlet. Key of the book object is {internalID title}
        all_books = {}

        for row in results:

            books = re.search("(@\d{6})(.*)", row["name"])

            if books:

                book = {}

                sec = re.search("\sSEC\s\-\s(.*)", books.group(2))

                if not sec:

                    only_a = "_A" in books.group(2)
                    only_d = "_D" in books.group(2)

                    if only_a and only_d:
                        dates_a_d = re.search(
                            "(_A((\d{2})-(\d{2})|(\d{2})))(_D((\d{2})-(\d{2})|(\d{2}))) (.*)",
                            books.group(2))

                        if not dates_a_d:
                            print(
                                "Fail - prep_books.py: Wrong pattern (1) in book title"
                            )
                            raise SystemExit(0)

                        book = add_performance_dates(dates_a_d.group(3),
                                                     dates_a_d.group(4),
                                                     dates_a_d.group(5), book)
                        book = add_publish_dates(dates_a_d.group(8),
                                                 dates_a_d.group(9),
                                                 dates_a_d.group(10), book)
                        book = pref.get_prefix_book(dates_a_d.group(11), book)

                    elif only_a and not only_d:
                        dates_a = re.search(
                            "(_A((\d{2})-(\d{2})|(\d{2})))(.*)",
                            books.group(2))

                        if not dates_a:
                            print(
                                "Fail - prep_books.py: Wrong pattern (2) in book title"
                            )
                            raise SystemExit(0)

                        book = add_performance_dates(dates_a.group(3),
                                                     dates_a.group(4),
                                                     dates_a.group(5), book)
                        book = pref.get_prefix_book(
                            dates_a.group(6).strip(), book)

                    elif only_d and not only_a:
                        dates_d = re.search(
                            "(_D((\d{2})-(\d{2})|(\d{2})))(.*)",
                            books.group(2))

                        if not dates_d:
                            print(
                                "Fail - prep_books.py: Wrong pattern (3) in book title"
                            )
                            raise SystemExit(0)

                        book = add_publish_dates(dates_d.group(3),
                                                 dates_d.group(4),
                                                 dates_d.group(5), book)
                        book = pref.get_prefix_book(
                            dates_d.group(6).strip(), book)

                    else:
                        book = pref.get_prefix_book(
                            books.group(2).strip(), book)

                    if "hasBookTitle" in book:

                        # Adding internal ID
                        book["hasBookInternalId"] = books.group(1)

                        # Create a key which has the following format {internalID (article) title}
                        if "hasPrefixBookTitle" in book:
                            unique_key = "{} {} {}".format(
                                book["hasBookInternalId"],
                                book["hasPrefixBookTitle"],
                                book["hasBookTitle"])
                        else:
                            unique_key = "{} {}".format(
                                book["hasBookInternalId"],
                                book["hasBookTitle"])

                        # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters
                        book_id = id.generate(unique_key)

                        # Adding ID of SQL table
                        book["sql"] = row["id"]

                        # Adding the book to the allBooks object
                        all_books[book_id] = book

                    else:
                        print("prep_books.py: FAIL - _A or _D not found",
                              books.group(2))

        conn.close()
        cursor.close()

        return all_books

    except Exception as err:
        print(err)
Exemple #12
0
def prepare():
    try:
        conn = pymysql.connect(host='localhost',
                               port=8889,
                               user='******',
                               password='******',
                               database='HAMLET')

        cursor = conn.cursor(pymysql.cursors.DictCursor)

        sql = "SELECT * FROM authors ORDER BY lastname"

        cursor.execute(sql)

        results = cursor.fetchall()

        # Contains all the authors from hyperhamlet. Key of the author object is {firstName lastName}
        all_authors = {}

        for row in results:

            author = {
                "hasFirstName": row["firstname"],
                "hasLastName": row["lastname"]
            }

            # Start with empty description
            description = ""

            if row["description"]:

                # ignore description if there is exclamation mark
                stop = re.search("!(.*)", row["description"])
                # birth date
                birth = re.search("(.*)b\.\s(.*)", row["description"])
                # death date
                death = re.search("(.*)d\.\s(.*)", row["description"])
                # florouit date
                floruit = re.search("(.*)fl\.\s(.*)", row["description"])
                # birth date as span
                birthDeath = re.search(
                    "(.*?)(\[(\d{1,4})-(\d{1,4})\]|(\d{1,4}))-(\[(\d{1,4})-(\d{1,4})\]|(\d{1,4}))(.*)",
                    row["description"])
                # female authors
                female = re.search("(\*)(.*)", row["description"])

                if stop:
                    # dates are saved in the description
                    description = stop.group(1)

                elif birth:

                    birth_span = re.search(
                        "(\[(\d{1,4})-(\d{1,4})\]|(\d{1,4}))(.*)",
                        birth.group(2))

                    if birth_span.group(4) is None:
                        author["birthStart"] = birth_span.group(2)
                        author["birthEnd"] = birth_span.group(3)
                        # print("Not Exact Birth", birth_span.groups(), row)
                    else:
                        author["birthExact"] = birth_span.group(1)
                        # print("Exact Birth", birth_span.groups(), row)

                    # add description
                    description = birth.group(1) + birth_span.group(5)

                elif death:

                    death_span = re.search(
                        "(\[(\d{1,4})-(\d{1,4})\]|(\d{1,4}))(.*)",
                        death.group(2))

                    if death_span.group(4) is None:
                        author["deathStart"] = death_span.group(2)
                        author["deathEnd"] = death_span.group(3)
                        # print("Not Exact Death", death_span.groups(), row)
                    else:
                        author["deathExact"] = death_span.group(1)
                        # print("Exact Death", death_span.groups(), row)

                    # add description
                    description = death.group(1) + death_span.group(5)

                elif floruit:

                    floruit_span = re.search(
                        "((\d{1,4})-(\d{1,4})|(\d{1,4}))(.*)",
                        floruit.group(2))

                    if floruit_span is not None:

                        # print(floruit_span.groups())

                        if floruit_span.group(4) is None:
                            author["activeStart"] = floruit_span.group(2)
                            author["activeEnd"] = floruit_span.group(3)
                            # print("Not Exact Floruit", floruit_span.groups(), row["description"])
                        else:
                            author["activeExact"] = floruit_span.group(1)
                            # print("Exact Floruit", floruit_span.groups(), row["description"])

                        # add description
                        description = floruit.group(1) + floruit_span.group(5)

                elif birthDeath:

                    if birthDeath.group(3):
                        author["birthStart"] = birthDeath.group(3)
                        author["birthEnd"] = birthDeath.group(4)
                        # print("Span Birth: ", birthDeath.groups(), row)
                    else:
                        author["birthExact"] = birthDeath.group(5)
                        # print("Exact Birth in Span: ", birthDeath.groups(), row)

                    if birthDeath.group(7):
                        author["deathStart"] = birthDeath.group(7)
                        author["deathEnd"] = birthDeath.group(8)
                        # print("Span Death: ", birthDeath.groups(), row)
                    else:
                        author["deathExact"] = birthDeath.group(9)
                        # print("Exact Death in Span: ", birthDeath.groups(), row)

                    # add description
                    description = birthDeath.group(1) + birthDeath.group(10)
                # all other descriptions which does not follow the defined rules
                else:
                    description = row["description"]

                # checks if there are only whitespaces or empty strings (excluding year numbers)
                if description.strip():
                    # add description to author after it has trimmed the string and removed the '*' (for gender)
                    author["hasDescription"] = description.rstrip().replace(
                        '*', '')
                    # add sex of author
                    if female:
                        author["hasGender"] = "female"
                    else:
                        author["hasGender"] = "male"

            # Create a key which has the following format{firstName lastName}
            unique_key = "{} {}".format(author["hasFirstName"],
                                        author["hasLastName"])

            # Creates id with the key from above. ID contains prefix and a hash which is a hexadecimal with 16 characters
            author_id = id.generate(unique_key)

            # Adding ID of SQL table
            author["sql"] = row["id"]

            # Adding the author to the all_author object
            all_authors[author_id] = author

        conn.close()
        cursor.close()

        return all_authors

    except Exception as err:
        print(err)
        raise SystemExit(0)