def query_database(query_type: str, query: str, database: str) -> (bool, dict): """ Process a SELECT or UPDATE query :param query_type: A string = 'select' or 'update' :param query: The text of a SPARQL query :param database: The database (name) to be queried :return: True if successful; False otherwise Query results (if the query_type is 'select'); An empty dictionary otherwise """ logging.info( f'Querying database, {database}, using {query_type}, with query, {query}' ) if query_type != 'select' and query_type != 'update': capture_error(f'Invalid query_type {query_type} for query_db', True) return False, dict() try: conn = stardog.Connection(database, **sd_conn_details) if query_type == 'select': # Select query, which will return results, if successful query_results = conn.select( query, content_type='application/sparql-results+json') if query_results['results']['bindings']: return True, query_results else: return True, dict() else: # Update query; No results (either success or failure) conn.update(query) return True, dict() except Exception as e: capture_error( f'Database ({database}) query exception for {query}: {str(e)}', True) return False, dict()
def display_similarities(store_name: str): """ Display a window to show 'similar' narratives. :param store_name: The database/data store name holding the narratives :return: TBD """ logging.info(f'Displaying similarities in {store_name}') # Setup the PySimpleGUI window sg.theme('Material2') layout = [[sg.Text("Not yet implemented.", font=('Arial', 16))], [ sg.Text("To exit, press 'End' or close the window.", font=('Arial', 16)) ]] # Create the GUI Window try: success, narrative_results = query_database('select', query_narrative_text, store_name) number_narratives = 0 if 'results' in narrative_results.keys() and \ 'bindings' in narrative_results['results'].keys(): number_narratives = len(narrative_results['results']['bindings']) if not number_narratives: sg.popup_error( f'No narrators are defined in {store_name}. ' f'Similarities graph cannot be displayed.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) except Exception as e: capture_error( f'Exception getting narratives for similarity analysis from {store_name}: {str(e)}', True) return window_similarities_list = sg.Window('Narrative Similarities', layout, icon=encoded_logo).Finalize() # Event Loop to process window "events" while True: event_similarities_list, values = window_similarities_list.read() if event_similarities_list in (sg.WIN_CLOSED, 'End'): # If user closes window or clicks 'End' break # TODO # Done window_similarities_list.close() return
def display_hypotheses(store_name: str): """ Display a window to show currently defined hypotheses, and allow selection of one for display and possibly edit, or permit definition of a new hypothesis. :param store_name: The database/data store name holding the hypotheses :return: TBD """ logging.info(f'Displaying hypotheses in {store_name}') # Setup the PySimpleGUI window sg.theme('Material2') layout = [[sg.Text("Not yet implemented.", font=('Arial', 16))], [ sg.Text("To exit, press 'End' or close the window.", font=('Arial', 16)) ], [sg.Text()], [ sg.Button('End', button_color='dark blue', size=(5, 1), font=('Arial', 14)) ]] # Get the data for the window try: success, hypotheses_results = query_database('select', query_hypotheses, store_name) number_hypotheses = 0 if success and 'results' in hypotheses_results.keys() and \ 'bindings' in hypotheses_results['results'].keys(): number_hypotheses = len(hypotheses_results['results']['bindings']) except Exception as e: capture_error( f'Exception getting hypotheses details from {store_name}: {str(e)}', True) return window_hypotheses_list = sg.Window('Display Hypotheses', layout, icon=encoded_logo).Finalize() # Event Loop to process window "events" while True: event_hypotheses_list, values = window_hypotheses_list.read() if event_hypotheses_list in (sg.WIN_CLOSED, 'End'): # If user closes window or clicks 'End' break # Done window_hypotheses_list.close() return
def _load_directory_to_database(directory_name, conn): """ Loads the DNA files to a new database/data store. :param directory_name: String holding the directory name :param conn: The connection to the Stardog DB for the database :return: None """ try: list_files = os.listdir(directory_name) for file in list_files: if file.endswith('.ttl'): conn.add(stardog.content.File(f'{directory_name}{file}')) except Exception as e: capture_error( f'Exception loading ontologies from {directory_name}: {str(e)}', True)
def get_databases() -> list: """ Return a list of all the databases/stores of narratives :return: List of database/store names """ logging.info('Getting a list of all databases') try: admin = stardog.Admin(**sd_conn_details) databases = admin.databases() db_names = list() except Exception as e: capture_error(f'Exception getting list of stores: {str(e)}', True) return [] for database in databases: db_names.append(database.name) return db_names
def add_remove_data(op_type: str, triples: str, database: str, graph: str = '') -> bool: """ Add or remove data to/from the database/store :param op_type: A string = 'add' or 'remove' :param triples: A string with the triples to be inserted/removed :param database: The database name :param graph: An optional named graph in which to insert/remove the triples :return: True if successful; False otherwise """ logging.info( f'Data {"added to" if op_type == "add" else "removed from"} {database}' f'{" and graph, " if graph else ""}{graph}') if op_type != 'add' and op_type != 'remove': capture_error(f'Invalid op_type {op_type} for add_remove_graph', True) return False try: conn = stardog.Connection(database, **sd_conn_details) conn.begin() if op_type == 'add': # Add to the database if graph: conn.add(stardog.content.Raw(triples, 'text/turtle'), graph_uri=graph) else: conn.add(stardog.content.Raw(triples, 'text/turtle')) else: # Remove from the database if graph: conn.remove(stardog.content.Raw(triples, 'text/turtle'), graph_uri=graph) else: conn.remove(stardog.content.Raw(triples, 'text/turtle')) conn.commit() return True except Exception as e: capture_error(f'Database ({op_type}) exception: {str(e)}', True) return False
def add_narr_data_to_store(narrative: str, narr_metadata: dict, store_name: str): """ Add narrative text and meta information to generate summary statistics and for later use in analyses. :param narrative: String consisting of the full narrative text :param narr_metadata: Dictionary of metadata information - Keys are: Source,Title,Person,Given,Surname,Maiden,Gender,Start,End,Remove,Header,Footer :param store_name: The database/data store name :return: None (Specified database/store is updated with the narrative text and metadata, translated into RDF) """ # Construct the narrator's/subject's identifier if narr_metadata['Maiden'] and narr_metadata['Surname']: narrator = f'{narr_metadata["Given"]} {narr_metadata["Maiden"]} {narr_metadata["Surname"]}' elif narr_metadata['Surname']: narrator = f'{narr_metadata["Given"]} {narr_metadata["Surname"]}' else: narrator = f'{narr_metadata["Given"]}' # Create the reference to the doc in the db store title = narr_metadata["Title"] iri_narrator = narrator.replace(SPACE, EMPTY_STRING) # Create triples describing the narrative and narrator/subject triples_list = list() triples_list.append(f'@prefix : <urn:ontoinsights:dna:> .') triples_list.append(f':{title} a :Narrative ; rdfs:label "{title}" ; ' f':text "{narrative}" ; :subject :{iri_narrator} .') triples_list.append(f':{iri_narrator} a :Person ; rdfs:label "{get_narrator_names(narr_metadata)}" .') if narr_metadata['Gender'] != 'U': triples_list.append(f':{iri_narrator} :has_agent_aspect {gender_dict[narr_metadata["Gender"]]} .') # Get additional information - the subject's birth date and place new_triples = get_birth_family_triples(narrative, narr_metadata['Given'], iri_narrator) if new_triples: triples_list.extend(new_triples) # Add the triples to the data store try: add_remove_data('add', ' '.join(triples_list), store_name) except Exception as e: capture_error(f'Exception adding narrative ({narr_metadata["Title"]}) triples to store: {str(e)}', True)
def create_delete_database(op_type: str, database: str) -> str: """ Create or delete a database. If created, add the DNA ontologies. :param op_type: A string = 'create' or 'delete' :param database: The database name :return: Empty string if successful or the details of an exception """ logging.info(f'Database {database} being {op_type}d') if op_type != 'create' and op_type != 'delete': capture_error(f'Invalid op_type {op_type} for create_delete_db', True) return '' try: admin = stardog.Admin(**sd_conn_details) if op_type == 'create': # Create database admin.new_database( database, { 'search.enabled': True, 'edge.properties': True, 'reasoning': True, 'reasoning.punning.enabled': True, 'query.timeout': '20m' }) # Load ontologies to the newly created database conn = stardog.Connection(database, **sd_conn_details) conn.begin() logging.info(f'Loading DNA ontologies to {database}') _load_directory_to_database(ontol_path, conn) _load_directory_to_database(f'{ontol_path}domain-context/', conn) conn.commit() else: # Delete database database_obj = admin.database(database) database_obj.drop() return '' except Exception as e: return f'Database ({op_type}) exception: {str(e)}'
'test_question'): display_popup_help(event) # New windows to process narratives elif event == 'From Existing Store': store_name = select_store() if store_name: success, count_results = query_database( 'select', query_number_narratives, store_name) if success and 'results' in count_results.keys( ) and 'bindings' in count_results['results'].keys(): count = int(count_results['results']['bindings'][0]['cnt'] ['value']) window['text-selected'].\ update(f'The data store, {store_name}, holds {count} narratives.') else: capture_error('The query for narrative count failed.', True) elif event == 'New, From CSV Metadata': store_name, count = ingest_narratives() if store_name: window['text-selected'].\ update(f'{count} narratives were added to the data store, {store_name}') elif event == 'Summary Statistics': if not store_name: sg.popup_error( "A narrative store must be loaded before selecting 'Summary Statistics'.", font=('Arial', 14), button_color='dark blue', icon=encoded_logo) else: display_statistics(store_name) elif event == 'Narrative Search/Display':
def display_narratives(store_name): """ Display a list of all narratives in the specified store and allow selection of one. :param store_name The database/data store name :return: None (Narrative timeline is displayed) """ logging.info('Narrative selection') # Create the GUI Window narrative_dict = dict() try: success, narrative_names = query_database('select', query_narratives, store_name) if success and 'results' in narrative_names.keys() and \ 'bindings' in narrative_names['results'].keys(): for binding in narrative_names['results']['bindings']: narrative_dict[binding['name']['value']] = binding['narrator'][ 'value'].split(':')[-1] else: sg.popup_error( f'No narratives are defined in {store_name}. ' f'Narrative timelines cannot be displayed.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) except Exception as e: capture_error( f'Exception getting narrative names from {store_name}: {str(e)}', True) return if not len(narrative_dict): sg.popup_ok( 'No narratives were found in the store. ' 'Please ingest one or more using the "Load Narratives" button.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) return else: narrative_list = list(narrative_dict.keys()) # Setup the PySimpleGUI window sg.theme('Material2') layout = [ [ sg.Text("Select a narrative and then press 'OK'.", font=('Arial', 16)) ], [ sg.Text( "To exit without making a selection, press 'End' or close the window.", font=('Arial', 16)) ], [ sg.Listbox(narrative_list, size=(30, 10), key='narrative_list', font=('Arial', 14), background_color='#fafafa', highlight_background_color='light grey', highlight_text_color='black', text_color='black') ], [sg.Text()], [ sg.Button('OK', button_color='dark blue', font=('Arial', 14), size=(5, 1)), sg.Button('End', button_color='dark blue', font=('Arial', 14), size=(5, 1)) ] ] # Create the GUI Window window_narrative_list = sg.Window('Select Narrative', layout, icon=encoded_logo).Finalize() # Event Loop to process window "events" while True: event_narrative_list, values = window_narrative_list.read() if event_narrative_list in (sg.WIN_CLOSED, 'End'): # If user closes window or clicks 'End' break if event_narrative_list == 'OK': if len(values['narrative_list']) != 1: sg.popup_error( 'Either no narrative was selected, or more than one was selected.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) else: narrative_name = values['narrative_list'][0] narrative_text = '' try: success1, narrative_text_results = query_database( 'select', query_narrative_text.replace('narrative_name', narrative_name), store_name) if success1 and 'results' in narrative_text_results.keys() and \ 'bindings' in narrative_text_results['results'].keys(): narrative_text = narrative_text_results['results'][ 'bindings'][0]['text']['value'] else: sg.popup_error( f'Error retrieving the text for the narrative, {narrative_name}, ' f'from {store_name}. The narrative details cannot be displayed.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) except Exception as e: capture_error( f'Exception getting narrative text for {narrative_name} ' f'from {store_name}: {str(e)}', True) return if narrative_text: _display_metadata(narrative_name, narrative_dict[narrative_name], narrative_text, store_name) _display_timeline(narrative_name, narrative_text) # Done window_narrative_list.close() return
def _display_metadata(narrative_name: str, narrator: str, narrative_text: str, store_name: str): """ """ logging.info(f'Displaying metadata and text for {narrative_name}') narrator_names = [] metadata_dict = dict() try: success1, metadata1_results = query_database( 'select', query_metadata1.replace("?narrator", f':{narrator}'), store_name) if success1 and 'results' in metadata1_results.keys() and \ 'bindings' in metadata1_results['results'].keys(): for binding in metadata1_results['results']['bindings']: narrator_names.append(binding['name']['value']) success2, metadata2_results = query_database( 'select', query_metadata2.replace("?narrator", f':{narrator}'), store_name) if success2 and 'results' in metadata2_results.keys() and \ 'bindings' in metadata1_results['results'].keys(): if len(metadata2_results['results']['bindings']): for binding in metadata2_results['results']['bindings']: # There should only be one result / one set of metadata for the narrator if 'country' in binding.keys(): metadata_dict['country'] = binding['country']['value'] else: metadata_dict['country'] = 'Unknown' if 'year' in binding.keys(): metadata_dict['year'] = binding['year']['value'] else: metadata_dict['year'] = 'Unknown' else: metadata_dict['country'] = 'Unknown' metadata_dict['year'] = 'Unknown' success3, metadata3_results = query_database( 'select', query_metadata3.replace("?narrator", f':{narrator}'), store_name) if success3 and 'results' in metadata3_results.keys() and \ 'bindings' in metadata1_results['results'].keys(): if len(metadata3_results['results']['bindings']): gender = '' for binding in metadata3_results['results']['bindings']: aspect = binding['aspect']['value'].split(':')[-1] if aspect in ('Agender', 'Bigender', 'Female', 'Male'): gender = aspect if gender: metadata_dict['gender'] = gender else: metadata_dict['gender'] = 'Unknown' else: metadata_dict['country'] = 'Unknown' metadata_dict['year'] = 'Unknown' if not (success1 or success2 or success3): sg.popup_error( f'Limited or no metadata was found for the narrator, {narrator.split(":")[-1]}. ' f'At a minimum, the narrative text will be displayed.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) except Exception as e: capture_error( f'Exception getting narrator details from {store_name}: {str(e)}', True) return # Setup the PySimpleGUI window sg.theme('Material2') layout = [[ sg.Text("Narrative Title:", font=('Arial', 16)), sg.Text(narrative_name, font=('Arial', 16)) ], [sg.Text()], [ sg.Text("Narrator Names:", font=('Arial', 16)), sg.Text(', '.join(narrator_names), font=('Arial', 16)) ], [sg.Text()], [ sg.Text("Narrator Gender:", font=('Arial', 16)), sg.Text(metadata_dict['gender'], font=('Arial', 16)) ], [ sg.Text("Narrator Birth Country:", font=('Arial', 16)), sg.Text(metadata_dict['country'], font=('Arial', 16)) ], [ sg.Text("Narrator Birth Year:", font=('Arial', 16)), sg.Text(metadata_dict['year'], font=('Arial', 16)) ], [sg.Text()], [sg.Text("Text:", font=('Arial', 16))], [ sg.Multiline(key='narr_text', font=('Arial', 14), size=(75, 30), auto_refresh=True, autoscroll=True, background_color='#fafafa', text_color='black', write_only=True) ], [sg.Text()], [sg.Text("To exit, close the window.", font=('Arial', 16))]] window_metadata_list = sg.Window(f'Metadata for {narrative_name}', layout, icon=encoded_logo).Finalize() # window_metadata_list['narr_text'].TKOut.output.config(wrap='word') window_metadata_list.FindElement('narr_text').Update(narrative_text) window_metadata_list.FindElement('narr_text').Widget.configure() window_metadata_list.read(timeout=0) return
def display_statistics(store_name: str): """ Display a window with buttons to show various graphs and charts, and/or output files with the top xx 'unknown to the ontology' nouns and verbs. :param store_name: The database/data store name :return: None (Window is displayed) """ logging.info(f'Displaying summary statistics for {store_name}') # Setup the PySimpleGUI window sg.theme('Material2') layout = [[ sg.Text( "Click one or more of the buttons to display various summary statistics.", font=('Arial', 16)) ], [ sg.Text("To exit, press 'End' or close the window.", font=('Arial', 16)) ], [sg.Text()], [sg.Text("Narrator Characteristics:", font=('Arial', 16))], [ sg.Button('Gender Details', font=('Arial', 14), button_color='dark blue', size=(20, 1), pad=((25, 0), 3)) ], [ sg.Button('Birth Year Details', font=('Arial', 14), button_color='dark blue', size=(20, 1), pad=((25, 0), 3)) ], [ sg.Button('Birth Country Details', font=('Arial', 14), button_color='dark blue', size=(20, 1), pad=((25, 0), 3)) ], [sg.Text()], [sg.Text("Narrative Information:", font=('Arial', 16))], [ sg.Button('Locations Mentioned', font=('Arial', 14), button_color='blue', size=(24, 1), pad=((25, 0), 3)) ], [ sg.Button('Years and Events Mentioned', font=('Arial', 14), button_color='blue', size=(24, 1), pad=((25, 0), 3)) ], [sg.Text()], [sg.Text("Frequent Words:", font=('Arial', 16))], [ sg.Button('Word Cloud', font=('Arial', 14), button_color='blue', size=(24, 1), pad=((25, 0), 3)), sg.Text('Number of words:', font=('Arial', 16)), sg.InputText(text_color='black', background_color='#ede8e8', size=(5, 1), font=('Arial', 16), key='words_in_cloud', do_not_clear=True) ], [ sg.Button('Output "Unknown" Nouns/Verbs', font=('Arial', 14), button_color='blue', size=(24, 1), pad=((25, 0), 3)), sg.Text('Number of nouns:', font=('Arial', 16)), sg.InputText(text_color='black', background_color='#ede8e8', size=(5, 1), font=('Arial', 16), key='nouns_in_csv', do_not_clear=True), sg.Text('Number of verbs:', font=('Arial', 16)), sg.InputText(text_color='black', background_color='#ede8e8', size=(5, 1), font=('Arial', 16), key='verbs_in_csv', do_not_clear=True) ], [ sg.Text("Directory:", font=('Arial', 16), pad=((125, 0), 3)), sg.FolderBrowse(target='directory_name', button_color='dark blue'), sg.InputText(text_color='black', background_color='#ede8e8', font=('Arial', 16), key='directory_name', do_not_clear=True) ], [ sg.Text( "The files, 'Nouns.csv' and 'Verbs.csv', will be written to the specified directory.", font=('Arial', 16)) ], [ sg.Text( "This processing takes SEVERAL MINUTES if a large number of narratives are analyzed.", font=('Arial', 16)) ], [sg.Text()], [ sg.Button('End', button_color='dark blue', size=(5, 1), font=('Arial', 14)) ]] # Create the GUI Window try: success1, number_narrators_results = query_database( 'select', query_number_narrators, store_name) if success1 and 'results' in number_narrators_results.keys() and \ 'bindings' in number_narrators_results['results'].keys(): number_narrators = int(number_narrators_results['results'] ['bindings'][0]['cnt']['value']) else: sg.popup_error( f'No narrators are defined in {store_name}. ' f'Gender and birth details cannot be displayed.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) number_narrators = 0 success2, narrative_text_results = query_database( 'select', query_narrative_text, store_name) if success2 and 'results' in narrative_text_results.keys() \ and 'bindings' in narrative_text_results['results'].keys(): narratives = EMPTY_STRING for binding in narrative_text_results['results']['bindings']: narratives += f" {binding['narr_text']['value']}" else: sg.popup_error( f'No narrators are defined in {store_name}. ' f'Summary graphs, charts and word frequencies cannot be generated.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) narratives = EMPTY_STRING except Exception as e: capture_error( f'Exception getting initial narrative details from {store_name}: {str(e)}', True) return window_stats_list = sg.Window('Display Summary Statistics', layout, icon=encoded_logo).Finalize() window_stats_list.FindElement('directory_name').Update( resources_root[0:len(resources_root) - 1]) window_stats_list.FindElement('words_in_cloud').Update(50) window_stats_list.FindElement('nouns_in_csv').Update(50) window_stats_list.FindElement('verbs_in_csv').Update(50) # Event Loop to process window "events" while True: event_stats_list, values = window_stats_list.read() if event_stats_list in (sg.WIN_CLOSED, 'End'): # If user closes window or clicks 'End' break elif event_stats_list == 'Gender Details': if number_narrators: logging.info(f'Displaying gender statistics for {store_name}') y_values, x_values = get_y_x_values(number_narrators, 'gender', query_genders, store_name) _display_horiz_histogram(y_values, x_values, 'Number of Narrators/Subjects', 'Narrator Genders') else: sg.popup_error( f'No narrators are defined in {store_name}. ' f'The gender histogram cannot be displayed.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) elif event_stats_list == 'Birth Year Details': if number_narrators: logging.info( f'Displaying birth year statistics for {store_name}') y_values, x_values = get_y_x_values(number_narrators, 'year', query_years, store_name) _display_horiz_histogram( y_values, x_values, 'Number of Narrators/Subjects Born in Year', 'Narrator Birth Years') else: sg.popup_error( f'No narrators are defined in {store_name}. ' f'The birth histograms cannot be displayed.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) elif event_stats_list == 'Birth Country Details': if number_narrators: logging.info( f'Displaying birth country statistics for {store_name}') y_values, x_values = get_y_x_values(number_narrators, 'country', query_countries, store_name) _display_horiz_histogram( y_values, x_values, 'Number of Narrators/Subjects Born in Country', 'Narrator Birth Countries') else: sg.popup_error( f'No narrators are defined in {store_name}. ' f'The birth histograms cannot be displayed.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) elif event_stats_list == 'Locations Mentioned': if not narratives: sg.popup_error( f'No narrators are defined in {store_name}. ' f'A list of locations cannot be extracted.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) continue _display_locations(narratives) elif event_stats_list == 'Years and Events Mentioned': if not narratives: sg.popup_error( f'No narrators are defined in {store_name}. ' f'A list of years and events cannot be extracted.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) continue _display_years_events(narratives) elif event_stats_list == 'Word Cloud': if not values['words_in_cloud']: sg.popup_error( 'A word count MUST be specified to configure the word cloud output. ' 'Please provide a value.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) continue if not narratives: sg.popup_error( f'No narrators are defined in {store_name}. ' f'The word cloud cannot be displayed.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) continue _display_word_cloud(narratives, int(values['words_in_cloud'])) elif event_stats_list == 'Output Top "Unknown" Nouns and Verbs': if not values['directory_name'] and not values[ 'nouns_in_csv'] and not values['verbs_in_csv']: sg.popup_error( 'A directory name and noun/verb word counts MUST be specified to save ' 'the unknown words and their frequency counts. Please provide all of these values.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) continue if not narratives: sg.popup_error( f'No narratives were found in {store_name}. ' f'The word frequencies cannot be output.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) continue logging.info(f'Outputting nouns/verbs for {store_name}') _output_words_in_csv(narratives, int(values['nouns_in_csv']), int(values['verbs_in_csv']), values['directory_name']) # Done window_stats_list.close() return
def test_hypothesis(store_name: str): """ Display a window to show currently defined hypotheses, and allow selection and test of one of them. :param store_name: The database/data store name holding the hypotheses and narratives :return: TBD """ logging.info(f'Test hypothesis in {store_name}') # Setup the PySimpleGUI window sg.theme('Material2') layout = [[sg.Text("Not yet implemented.", font=('Arial', 16))], [ sg.Text("To exit, press 'End' or close the window.", font=('Arial', 16)) ], [sg.Text()], [ sg.Button('End', button_color='dark blue', size=(5, 1), font=('Arial', 14)) ]] # Create the GUI Window try: success1, hypotheses_results = query_database('select', query_hypotheses, store_name) number_hypotheses = 0 number_narratives = 0 if success1 and 'results' in hypotheses_results.keys() and \ 'bindings' in hypotheses_results['results'].keys(): number_hypotheses = len(hypotheses_results['results']['bindings']) success2, narratives_results = query_database('select', query_number_narratives, store_name) if success2 and 'results' in narratives_results.keys() and \ 'bindings' in narratives_results['results'].keys(): number_narratives = int( narratives_results['results']['bindings'][0]['cnt']['value']) error_msg = '' if not number_hypotheses: error_msg = 'No hypotheses' if not number_narratives: error_msg = 'and no narratives' else: if not number_narratives: error_msg = 'No narratives' if error_msg: error_msg += f' are defined in {store_name}.' sg.popup_error(error_msg, font=('Arial', 14), button_color='dark blue', icon=encoded_logo) except Exception as e: capture_error( f'Exception getting hypotheses details from {store_name}: {str(e)}', True) return window_test_list = sg.Window('Test Hypothesis', layout, icon=encoded_logo).Finalize() # Event Loop to process window "events" while True: event_test_list, values = window_test_list.read() if event_test_list in (sg.WIN_CLOSED, 'End'): # If user closes window or clicks 'End' break # TODO # Done window_test_list.close() return
def process_csv(csv_file: str, store_name: str, store_list: list) -> int: """ Input the specified CSV file and process the narratives defined in it. The format of the CSV MUST be: Source,Title,Person,Given,Given2,Surname,Maiden,Maiden2,Gender,Start,End,Remove,Header,Footer :param csv_file: CSV file name :param store_name: Database/data store name :param store_list: List of the existing dbs - Need to determine if a db name is new or existing :return: Count of the number of narratives ingested """ logging.info(f'Processing the CSV, {csv_file}') count = 0 db_exception = EMPTY_STRING if store_name not in store_list: db_exception = create_delete_database('create', store_name) if db_exception: capture_error(f'Error creating or deleting {store_name}: {db_exception}', True) return 0 try: with open(csv_file, newline=EMPTY_STRING) as meta_file: narr_dict = csv.DictReader(meta_file) # Process each narrative based on the metadata: # Source,Title,Person,Given,Given2,Surname,Maiden,Maiden2,Gender,Start,End,Header,Footer for narr_meta in narr_dict: if 'Title' not in narr_meta.keys() or 'Given' not in narr_meta.keys(): capture_error('Expected columns not found in the CSV file. Processing stopped.', False) title = narr_meta['Title'] logging.info(f'Ingesting the document, {title}') source = narr_meta['Source'] # Must have at least the Source, Title, Person and Gender values defined if not source or not title or not narr_meta['Person'] \ or not narr_meta['Gender']: sg.popup_error(f'For any source, the Source, Title, Person and Gender details MUST be ' f'provided. This is not true for the CSV record with source file, ' f'{source}, and narrative title, {title}. That record is skipped.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) continue if source.endswith('.pdf'): # Capture each narrative text from the metadata details in the CSV if not narr_meta['Start'] and not narr_meta['End']: sg.popup_error(f'For PDF source files, the Start and End page details MUST be ' f'provided. This is not true for the CSV record with source file, ' f'{source}, and narrative title, {title}. That record is skipped.', font=('Arial', 14), button_color='dark blue', icon=encoded_logo) continue in_file = f'{resources_root}{title}' subprocess.run(['../tools/pdftotext', '-f', narr_meta['Start'], '-l', narr_meta['End'], '-simple', f'{resources_root}{source}', in_file]) else: in_file = f'{resources_root}{source}' with open(in_file, 'r', encoding='utf8', errors='ignore') as narr_in: text = clean_text(narr_in.read(), narr_meta) narrative = simplify_text(text, narr_meta) add_narr_data_to_store(narrative.replace('"', "'"), narr_meta, store_name) # create_narrative_graph(narrative, title, store_name) if source.endswith('.pdf'): # Cleanup - Delete the text file created by pdftotext os.remove(in_file) count += 1 # Determine if any narrators/subjects (different names) are really the same logging.info('Checking if any unification can be performed') unified_triples = unify_narrators(store_name) if unified_triples: # Add the triples to the data store add_remove_data('add', ' '.join(unified_triples), store_name) except Exception as e: capture_error(f'Exception ingesting narratives: {str(e)}', True) return count