def apply_to_sheet(self, worksheet: gspread.Worksheet): """ Add a row to the specified Google Sheet worksheet corresponding to this update's new Slack message. """ user_name = slack_utils.get_user_display(self.user_id) row_data = [ self.message_id, user_name, self.message, self.timestamp.isoformat() ] # Inserts row into the spreadsheet with an offset of 2 # (After row 1 (header row)) worksheet.insert_row(row_data, 2) logger.info(f"Message with id={self.message} by {user_name} added")
def _ensure_sheet_formatting(worksheet: gspread.Worksheet): """ Ensure that the specified worksheet has the correct header row. Overwrites the header row if a header mismatch is found. """ worksheet_headers = list(worksheet.row_values(1)) expected_headers = list(ColumnHeaders.__members__.keys()) # Check if the current_headers line up with the updated header structure if worksheet_headers != expected_headers: logger.warning("Prexisting table, with improper formatting: Fixing") # TODO: move all data, not just headers worksheet.delete_row(1) worksheet.insert_row(expected_headers, 1) else: # TODO: ensure the below is still necessary if worksheet.row_count in (0, 1): worksheet.insert_row([], 1) worksheet.insert_row(expected_headers, 1) worksheet.delete_row(3) worksheet.delete_row(1)
def process_datasets(config: SettingConfig, sheet: Worksheet) -> None: print("Started running RASA") # Compute and write the title of the spreadsheet based on the loaded configurations spreadsheet_title = [config.identifier] sheet.insert_row(spreadsheet_title, 1) # For each scenario folder spreadsheet_row_index = SPREADSHEET_START_VERTICAL_OFFSET for file in os.listdir(config.datasets_path): # Compute the path for the scenario folder folder_path = os.path.join(config.datasets_path, file) if os.path.isdir(folder_path): # Break if the directory does not contain the splits if len(os.listdir(folder_path)) <= 2: print( "Directory only contains train and test files, but no splits. Breaking." ) break # Compute the scenario file name file_path = os.path.join(folder_path, file) # Compute the reports path and create the directory scenario_reports_path = os.path.join(folder_path, 'scenario_reports') os.mkdir(scenario_reports_path) # Compute the intent and slot reports paths and create them intent_reports_path = os.path.join(scenario_reports_path, 'intent_reports') slot_reports_path = os.path.join(scenario_reports_path, 'slot_reports') os.mkdir(intent_reports_path) os.mkdir(slot_reports_path) scenario_slot_results = [f'Slot - {file}'] scenario_intent_results = [f'Intent - {file}'] for split_id in range(config.splits): # Compute the identifier, get the train split and test split identifier = f" {file}, split {split_id}" train_split = file_path + "_train_" + f"{split_id}" + ".json" test_split = file_path + "_test_" + f"{split_id}" + ".json" # Run the subprocess for RASA training and testing, and wait for its completion command = [ config.rasa_script_path, train_split, test_split, config.rasa_config_path ] subprocess.Popen(command, shell=True).wait() # Process the slot and intent errors & reports and save their return values intent_f1 = process_intent_result(identifier, scenario_reports_path, config) slot_f1 = process_slot_result(identifier, scenario_reports_path, config) # Move the confusion matrix to the results path copy_confusion_matrix(identifier, config) scenario_slot_results.append(float("{:0.4f}".format(slot_f1))) scenario_intent_results.append( float("{:0.4f}".format(intent_f1))) print(f"Finished processing split {identifier}") # Append the mean value to each list for the scenario scenario_intent_results.append( float("{:0.4f}".format(mean(scenario_intent_results[1:])))) scenario_slot_results.append( float("{:0.4f}".format(mean(scenario_slot_results[1:])))) # Append the standard deviation to each list for the scenario scenario_intent_results.append( float("{:0.3f}".format( stdev(scenario_intent_results[1:len(scenario_intent_results ) - 2])))) scenario_slot_results.append( float("{:0.3f}".format( stdev(scenario_slot_results[1:len(scenario_slot_results) - 2])))) # Append the line in the google doc: sheet.insert_row(scenario_slot_results, spreadsheet_row_index) sheet.insert_row(scenario_intent_results, spreadsheet_row_index) spreadsheet_row_index += 3