import Core_Scripts.Custom_Functions.Functions_GUI as GUI import Core_Scripts.Custom_Functions.Functions_Configs as configFunc import os import tkinter as tk import mysql.connector from datetime import datetime import traceback # get current path where script is ran cwd = os.getcwd() target_table = 'projects' # incorporate a check to see if the config file already has data provided configName = 'SQL_Config.csv' fields = 'Host', 'User', 'Password', 'Database_Name' config, config_exist = configFunc.check_for_config(configName, field_list=fields) # database credentials master_window = tk.Tk() master_window.title('Config Setup: Database information') entry_form = GUI.makeform(master_window, fields, config_exist, config) # binds code to run if the user presses the enter key when in the form # master_window.bind('<Return>', (lambda event, e=entry_form: get_input_data(e))) # Update button and quit button do the same commands ok_button = tk.Button(master_window, text='Update', command=master_window.quit) ok_button.pack(side=tk.LEFT, padx=5, pady=5) quit_button = tk.Button(master_window, text='Quit', command=master_window.destroy) quit_button.pack(side=tk.LEFT, padx=5, pady=5)
def run_analysis_excel(): start = time.time() configName = 'Local_Config.csv' # get the config file and database name + directory config, found = configFunc.check_for_config(configName) db_name = config['DB_Filename'] db_path = config['DB_Filepath'] maindirectory = config['Folderpath'] project_name = config['Foldername'] remove_identification = config['Simplify_Names'] == '1' os.chdir(db_path) # get team data for classification (only needed for a few functions) teamsheet = 'TeamList' db_team_data = pd.read_excel(db_name, sheet_name=teamsheet) eng_team_list = db_team_data[db_team_data['Type'] == 'ENG'].Name.tolist() bim_team_list = db_team_data[db_team_data['Type'] == 'BIM'].Name.tolist() # extraction of data, timing for each main process for general time to execute start_extracting = time.time() raw_markup_data, raw_pdf_data = script_extract_pdf_data(maindirectory) raw_markup_data['Project'] = project_name raw_pdf_data['Project'] = project_name # format and cleanup markup data, keep raw markup data as-is start_formatting_markup = time.time() formatted_markup_data = script_format_markup_data(raw_markup_data, eng_team_list, bim_team_list) # format raw pdf data and calculate summaries of markups for each pdf start_analyzing = time.time() formatted_pdf_data = script_analyze_markups(formatted_markup_data, raw_pdf_data, eng_team_list, bim_team_list) # calculate completion times where applicable (check markups and note most recent ones from each team) start_data_general = time.time() analyzed_pdf_data = script_process_data_general(formatted_markup_data, formatted_pdf_data) # calculate graph and time series showing rates of completion and backlog amount on a given day start_data_history = time.time() visualization_data = script_data_history_summary(analyzed_pdf_data) visualization_data['Project'] = project_name # saving to database, sheet names, start timer for saving print('Saving data.') start_saving = time.time() # if we want to remove identification, replace names in the data sets accordingly if remove_identification: print('Removing Identifying Names') # to make functions easier to work with, capitalize the team data authors = [s.upper() for s in db_team_data['Name'].tolist()] author_type = [s.upper() for s in db_team_data['Type'].tolist()] # Create a dictionary of author names to types team_mapping = dict(zip(authors, author_type)) author_col = '/T' # convert to initials for raw markup data, fix where there is no author author_initials = customFunc.simplify_author_names( raw_markup_data[author_col].tolist(), team_mapping) author_initials = ['None' if s == 'N' else s for s in author_initials] raw_markup_data[author_col] = author_initials # convert to initials for formatted markup data, fix where there is no author author_initials = customFunc.simplify_author_names( formatted_markup_data[author_col].tolist(), team_mapping) author_initials = ['None' if s == 'N' else s for s in author_initials] formatted_markup_data[author_col] = author_initials # convert to initials for final pdf data, fix where there is no author author_col = 'LastMarkupEditFrom' author_initials = customFunc.simplify_author_names( analyzed_pdf_data[author_col].tolist(), team_mapping) author_initials = ['None' if s == 'N' else s for s in author_initials] analyzed_pdf_data[author_col] = author_initials print('Author Identification removed') # saving variables for easier adjustments rawmarkupsheet = 'MarkupRawData' formatted_markup_sheet = 'MarkupData' pdf_data_sheet = 'PDF_Data' visualize_sheet = 'VisualizationData' time_sheet = 'Runtime History' # reset the path to make sure it saves to the right location os.chdir(db_path) generalFunc.append_df_to_excel(db_name, raw_markup_data, rawmarkupsheet, startrow=0, truncate_sheet=True) generalFunc.append_df_to_excel(db_name, formatted_markup_data, formatted_markup_sheet, startrow=0, truncate_sheet=True) generalFunc.append_df_to_excel(db_name, analyzed_pdf_data, pdf_data_sheet, startrow=0, truncate_sheet=True) generalFunc.append_df_to_excel(db_name, visualization_data, visualize_sheet, startrow=0, truncate_sheet=True) # end timer and calculate the execution times of portions of the codes, then save end = time.time() runtime = round(end - start, 3) total_file_count = len(analyzed_pdf_data.iloc[:, 0]) time_df = pd.DataFrame( [datetime.now(), total_file_count, runtime, project_name]) generalFunc.append_df_to_excel(db_name, time_df.transpose(), time_sheet, header=None) print('Data saved. All processing complete. Total processed time:', str(runtime), 'seconds for', str(total_file_count), 'files.') print('Time Summaries (in seconds):') print('Time to save: ', str(round(end - start_saving, 3))) print('Time to extract: ', str(round(start_formatting_markup - start_extracting, 3))) print('Time to format: ', str(round(start_analyzing - start_formatting_markup, 3))) print('Time to analyze: ', str(round(start_data_general - start_analyzing, 3))) print('Time to process: ', str(round(start_data_history - start_data_general, 3))) print('Time to calculate history: ', str(round(start_saving - start_data_history, 3)))
# for easier adjustment, set variables for certain dictionary keys fold_path, db_path, db_name = 'Folderpath', 'DB_Filepath', 'DB_Filename' fields = fold_path, db_path # start tkinter and give the window a title root = tk.Tk() root.title("Config Setup") # setup the variables to store the output into using the dialogue filename = tk.StringVar() dirname = tk.StringVar() simple_identity = tk.IntVar() # assume config wont exist, check to see if it is found configName = 'Local_Config.csv' config, found = configFunc.check_for_config(configName) # try to set defaults for the dialogue window. Moved to outside the loop as having a default initiation for each # variable starts to get messy and conditions for config file not being found would be more complicated try: default_foldpath = config[fold_path] dirname.set(default_foldpath) except: default_foldpath = "No folder selected" try: default_dbpath = config[db_path] + config[db_name] filename.set(default_dbpath) except: default_dbpath = "No file selected"
def run_analysis_mysql(config_name='SQL_Config.csv'): start = time.time() # get the config file and database name + directory config, found = configFunc.check_for_config(config_name) # these values are setup from the config file # remove identification is set to be a string 0 or a 1, convert to boolean for ease of use in branches project_name = config['Project'] remove_identification = config['Simplify_Names'] == '1' # get SQL connection working connection = mysql.connector.connect(host=config['Host'], user=config['User'], passwd=config['Password'], database=config['Database_Name']) # this has to come from the DB project_table = 'projects' mycursor = connection.cursor() # SQL syntax for project column name has to be manually updated if it is renamed mycursor.execute("SELECT directory FROM " + project_table + " WHERE Project = '" + project_name + "'") directory_db = mycursor.fetchone() # convert from tuple to a regular string for python to use project_directory = directory_db[0] # SQL syntax for project column name has to be manually updated if it is renamed mycursor.execute("SELECT uses_studio FROM " + project_table + " WHERE Project = '" + project_name + "'") studio_property = mycursor.fetchone() # convert from tuple to a regular string for python to use project_uses_studio = studio_property[0] == 1 print('Analyzing project: ' + project_name) # get team data for classification (only needed for a few functions). team data cant get column names changed team_table = 'team_list' mycursor = connection.cursor() mycursor.execute("SELECT name, type FROM " + team_table) # syntax to get data from the database, output is a list of lists per entry, then converted into dataframe # to filter results team_df = pd.DataFrame(mycursor.fetchall(), columns=['Name', 'Type']) eng_team_list = team_df[team_df['Type'] == 'ENG'].Name.tolist() bim_team_list = team_df[team_df['Type'] == 'BIM'].Name.tolist() # extraction of data, timing for each main process for general time to execute start_extracting = time.time() raw_markup_data, raw_pdf_data = script_extract_pdf_data(project_directory) raw_markup_data['Project'] = project_name raw_pdf_data['Project'] = project_name # format and cleanup markup data, keep raw markup data as-is start_formatting_markup = time.time() formatted_markup_data = script_format_markup_data(raw_markup_data, eng_team_list, bim_team_list) # format raw pdf data and calculate summaries of markups for each pdf start_analyzing = time.time() formatted_pdf_data = script_analyze_markups(formatted_markup_data, raw_pdf_data, eng_team_list, bim_team_list) # calculate completion times where applicable (check markups and note most recent ones from each team) start_data_general = time.time() analyzed_pdf_data = script_process_data_general(formatted_markup_data, formatted_pdf_data) # calculate graph and time series showing rates of completion and backlog amount on a given day start_data_history = time.time() visualization_data = script_data_history_summary(analyzed_pdf_data) visualization_data['Project'] = project_name # saving to database, sheet names print('Saving data.') start_saving = time.time() # if we want to remove identification, replace names in the data sets accordingly if remove_identification: print('Removing Identifying Names') # to make functions easier to work with, capitalize the team data authors = [s.upper() for s in team_df['Name'].tolist()] author_type = [s.upper() for s in team_df['Type'].tolist()] # Create a dictionary of author names to types team_mapping = dict(zip(authors, author_type)) author_col = '/T' # convert to initials for raw markup data, fix condition where there is no author author_initials = customFunc.simplify_author_names( raw_markup_data[author_col].tolist(), team_mapping) author_initials = ['None' if s == 'N' else s for s in author_initials] raw_markup_data[author_col] = author_initials # convert to initials for formatted markup data, fix condition where there is no author author_initials = customFunc.simplify_author_names( formatted_markup_data[author_col].tolist(), team_mapping) author_initials = ['None' if s == 'N' else s for s in author_initials] formatted_markup_data[author_col] = author_initials # convert to initials for final pdf data, fix condition where there is no author author_col = 'LastMarkupEditFrom' author_initials = customFunc.simplify_author_names( analyzed_pdf_data[author_col].tolist(), team_mapping) author_initials = ['None' if s == 'N' else s for s in author_initials] analyzed_pdf_data[author_col] = author_initials print('Author Identification removed') # SQL saving variables, pdf table col names should be mapped according to the order the dataframe is in pdf_table = 'pdf_data' print('Saving: saving pdf data to ' + pdf_table) pdf_table_col_names = [ 'CreationDate', 'ModDate', 'Filename', 'Project', 'DwgNo', 'MarkupCount', 'Last_Editor', 'StampedBy', 'Eng_Stamp_Type', 'Latest', 'Engineer_Revised', 'BIM_Complete', 'Complete_Time' ] delete_syntax = "DELETE FROM " + pdf_table + " WHERE Project = '" + project_name + "'" save.save_to_SQL(connection, pdf_table, pdf_table_col_names, analyzed_pdf_data, delete_syntax_condition=delete_syntax) # prepare the raw markups for saving, this can error if there are no raw markups when savings, so need to catch # SQL saving variables, pdf table col names should be mapped according to the order the dataframe is in raw_markup_table = 'raw_markups' print('Saving: saving raw markup data to ' + raw_markup_table) raw_markup_table_col_names = [ 'Subtype', 'CreationDateRaw', 'Subject', 'Contents', 'Author', 'Filename', 'Project' ] delete_syntax = "DELETE FROM " + raw_markup_table + " WHERE Project = '" + project_name + "'" # try to save, and if failed, attempt to log error errors = 0 saving_log = open('Logs/SQL_save_errors.txt', "a") try: save.save_to_SQL(connection, raw_markup_table, raw_markup_table_col_names, raw_markup_data, delete_syntax_condition=delete_syntax) except Exception: errors += 1 # we want to see a timestamp and the project name that this failed on errmsg = "\n" + str(datetime.now( )) + " EXCEPTION BELOW DURING SAVING RAW MARKUPS FOR PROJECT {" errmsg += project_name + "}:\n" errmsg += str(traceback.format_exc()) saving_log.write(errmsg) print(errmsg) # markups need a drop included until functionality is adjusted to exclude certain columns from being populated # functionality can be included in SQL if needed, but may need to get added at a later point markup_table = 'markup_data' print('Saving: saving markup data to ' + markup_table) markup_table_col_names = [ 'Subtype', 'CreationDate', 'Subject', 'Author', 'Filename', 'Project', 'DwgNo', 'AuthorType' ] delete_syntax = "DELETE FROM " + markup_table + " WHERE Project = '" + project_name + "'" sql_markup_df = formatted_markup_data.drop( columns=['Time_YMD', 'Time_HMS', '/Contents']) # try to save, and if failed, attempt to log error try: if len(sql_markup_df) < 1: print('No markup data to save.') else: save.save_to_SQL(connection, markup_table, markup_table_col_names, sql_markup_df, delete_syntax_condition=delete_syntax) except Exception: errors += 1 # we want to see a timestamp and the project name that this failed on errmsg = "\n" + str(datetime.now( )) + " EXCEPTION BELOW DURING SAVING FORMATTED MARKUPS FOR PROJECT {" errmsg += project_name + "}:\n" errmsg += str(traceback.format_exc()) saving_log.write(errmsg) print(errmsg) # close the log file as it shouldnt be used anymore past this point saving_log.close() visualization_table = 'visualization_data' print('Saving: saving visualization data to ' + visualization_table) visualization_table_col_names = [ 'Date', 'Engineer_Revised', 'BIM_Completed', 'Total_Outstanding', 'Project' ] delete_syntax = "DELETE FROM " + visualization_table + " WHERE Project = '" + project_name + "'" save.save_to_SQL(connection, visualization_table, visualization_table_col_names, visualization_data, delete_syntax_condition=delete_syntax) # end timer and figure out the time to execute the scripts end = time.time() runtime = round(end - start, 3) total_file_count = len(analyzed_pdf_data.iloc[:, 0]) time_list = [datetime.now(), total_file_count, runtime, project_name] time_table = 'runtime_history' time_table_columns = ['Date', 'File_Count', 'Runtime', 'Project'] print('Saving: saving data to ' + time_table) save.save_to_SQL(connection, time_table, time_table_columns, time_list) print('Data saved. All processing complete. Total processed time:', str(runtime), 'seconds for', str(total_file_count), 'files.') print('Time Summaries (in seconds):') print('Time to save: ', str(round(end - start_saving, 3))) print('Time to extract: ', str(round(start_formatting_markup - start_extracting, 3))) print('Time to format: ', str(round(start_analyzing - start_formatting_markup, 3))) print('Time to analyze: ', str(round(start_data_general - start_analyzing, 3))) print('Time to process: ', str(round(start_data_history - start_data_general, 3))) print('Time to calculate history: ', str(round(start_saving - start_data_history, 3))) if errors > 0: print('---------------------------------------------', '\nERRORS FOUND DURING RUNTIME, REVIEW LOG FILES\n', '---------------------------------------------')
import csv # function imports import Core_Scripts.Custom_Functions.Functions_Configs as configFunc # extra imports from shutil import copyfile # main imports from Run_Setup_and_Analysis_SQL_DB import run_analysis_mysql original_config = 'SQL_Config.csv' new_config_location = 'SQL_Config_multiple.csv' project_list_file = 'Project_list.csv' copyfile(original_config, new_config_location) config = configFunc.check_for_config(new_config_location)[0] # open the csv, using the csv reader, and then convert into a list (for single column csv) project_list = list(csv.reader(open(project_list_file))) # logging = open('script_results.txt', 'w') # sys.stdout = logging logfile = open('Logs/errors.txt', "a") errors = 0 for eachproject in project_list: # have to convert the list into its main string object print('Run on project:', eachproject[0]) # adjust the active project config['Project'] = eachproject[0] # overwrite the config to make function use the altered project without user input configFunc.save_config(new_config_location, config) try: