Ejemplo n.º 1
0
    def apply_to_sheet(self, worksheet: gspread.Worksheet):
        """
        Add a row to the specified Google Sheet worksheet corresponding to
        this update's new Slack message.
        """
        user_name = slack_utils.get_user_display(self.user_id)
        row_data = [
            self.message_id,
            user_name,
            self.message,
            self.timestamp.isoformat()
        ]

        # Inserts row into the spreadsheet with an offset of 2
        # (After row 1 (header row))
        worksheet.insert_row(row_data, 2)
        logger.info(f"Message with id={self.message} by {user_name} added")
Ejemplo n.º 2
0
def _ensure_sheet_formatting(worksheet: gspread.Worksheet):
    """
    Ensure that the specified worksheet has the correct header row.

    Overwrites the header row if a header mismatch is found.
    """
    worksheet_headers = list(worksheet.row_values(1))
    expected_headers = list(ColumnHeaders.__members__.keys())

    # Check if the current_headers line up with the updated header structure
    if worksheet_headers != expected_headers:
        logger.warning("Prexisting table, with improper formatting: Fixing")
        # TODO: move all data, not just headers
        worksheet.delete_row(1)
        worksheet.insert_row(expected_headers, 1)
    else:
        # TODO: ensure the below is still necessary
        if worksheet.row_count in (0, 1):
            worksheet.insert_row([], 1)
            worksheet.insert_row(expected_headers, 1)
            worksheet.delete_row(3)
        worksheet.delete_row(1)
Ejemplo n.º 3
0
def process_datasets(config: SettingConfig, sheet: Worksheet) -> None:
    print("Started running RASA")

    # Compute and write the title of the spreadsheet based on the loaded configurations
    spreadsheet_title = [config.identifier]
    sheet.insert_row(spreadsheet_title, 1)

    # For each scenario folder
    spreadsheet_row_index = SPREADSHEET_START_VERTICAL_OFFSET
    for file in os.listdir(config.datasets_path):

        # Compute the path for the scenario folder
        folder_path = os.path.join(config.datasets_path, file)
        if os.path.isdir(folder_path):

            # Break if the directory does not contain the splits
            if len(os.listdir(folder_path)) <= 2:
                print(
                    "Directory only contains train and test files, but no splits. Breaking."
                )
                break

            # Compute the scenario file name
            file_path = os.path.join(folder_path, file)

            # Compute the reports path and create the directory
            scenario_reports_path = os.path.join(folder_path,
                                                 'scenario_reports')
            os.mkdir(scenario_reports_path)

            # Compute the intent and slot reports paths and create them
            intent_reports_path = os.path.join(scenario_reports_path,
                                               'intent_reports')
            slot_reports_path = os.path.join(scenario_reports_path,
                                             'slot_reports')

            os.mkdir(intent_reports_path)
            os.mkdir(slot_reports_path)

            scenario_slot_results = [f'Slot - {file}']
            scenario_intent_results = [f'Intent - {file}']

            for split_id in range(config.splits):
                # Compute the identifier, get the train split and test split
                identifier = f" {file}, split {split_id}"
                train_split = file_path + "_train_" + f"{split_id}" + ".json"
                test_split = file_path + "_test_" + f"{split_id}" + ".json"

                # Run the subprocess for RASA training and testing, and wait for its completion
                command = [
                    config.rasa_script_path, train_split, test_split,
                    config.rasa_config_path
                ]
                subprocess.Popen(command, shell=True).wait()

                # Process the slot and intent errors & reports and save their return values
                intent_f1 = process_intent_result(identifier,
                                                  scenario_reports_path,
                                                  config)
                slot_f1 = process_slot_result(identifier,
                                              scenario_reports_path, config)

                # Move the confusion matrix to the results path
                copy_confusion_matrix(identifier, config)

                scenario_slot_results.append(float("{:0.4f}".format(slot_f1)))
                scenario_intent_results.append(
                    float("{:0.4f}".format(intent_f1)))

                print(f"Finished processing split {identifier}")

            # Append the mean value to each list for the scenario
            scenario_intent_results.append(
                float("{:0.4f}".format(mean(scenario_intent_results[1:]))))
            scenario_slot_results.append(
                float("{:0.4f}".format(mean(scenario_slot_results[1:]))))

            # Append the standard deviation to each list for the scenario
            scenario_intent_results.append(
                float("{:0.3f}".format(
                    stdev(scenario_intent_results[1:len(scenario_intent_results
                                                        ) - 2]))))
            scenario_slot_results.append(
                float("{:0.3f}".format(
                    stdev(scenario_slot_results[1:len(scenario_slot_results) -
                                                2]))))

            # Append the line in the google doc:
            sheet.insert_row(scenario_slot_results, spreadsheet_row_index)
            sheet.insert_row(scenario_intent_results, spreadsheet_row_index)
            spreadsheet_row_index += 3