Beispiel #1
0
    def load_and_run_default_preprocessing_phase_task(self):
        logger.info("Execute task 'load_and_run_default_preprocessing_phase_task'.")

        if self.document_base is not None:
            self.statistics = Statistics(self.collect_statistics)
            self.save_statistics_to_json_action.setEnabled(self.collect_statistics)

            self.disable_global_input()

            # noinspection PyUnresolvedReferences
            self.load_and_run_default_preprocessing_phase.emit(self.document_base, self.statistics)
Beispiel #2
0
    def run_matching_phase_task(self):
        logger.info("Execute task 'run_matching_phase_task'.")

        if self.document_base is not None and self.matching_phase is not None:
            self.statistics = Statistics(self.collect_statistics)
            self.save_statistics_to_json_action.setEnabled(self.collect_statistics)

            self.disable_global_input()
            self.interactive_matching_widget.enable_input()
            self.show_interactive_matching_widget()

            # noinspection PyUnresolvedReferences
            self.run_matching_phase.emit(self.document_base, self.matching_phase, self.statistics)
Beispiel #3
0
from aset.matching.phase import TreeSearchMatchingPhase
from aset.preprocessing.embedding import BERTContextSentenceEmbedder, FastTextLabelEmbedder, RelativePositionEmbedder, \
    SBERTExamplesEmbedder, SBERTTextEmbedder
from aset.preprocessing.extraction import StanzaNERExtractor
from aset.preprocessing.phase import PreprocessingPhase
from aset.resources import ResourceManager
from aset.statistics import Statistics
from aset.status import StatusFunction

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger()

if __name__ == "__main__":
    with ResourceManager() as resource_manager:
        status_function = StatusFunction(callback_fn=None)
        statistics = Statistics(do_collect=True)

        documents = dataset.load_dataset()

        # create the document base
        document_base = ASETDocumentBase(
            documents=[ASETDocument(document["id"], document["text"]) for document in documents],
            attributes=[
                ASETAttribute("date"),
                ASETAttribute("airport code")
            ]
        )

        # preprocessing
        preprocessing_phase = PreprocessingPhase(
            extractors=[
Beispiel #4
0
    def __init__(self) -> None:
        super(MainWindow, self).__init__()
        self.setWindowTitle("ASET")

        self.document_base = None
        self.preprocessing_phase = None
        self.matching_phase = None
        self.statistics = Statistics(True)
        self.collect_statistics = True

        # set up the api_thread and api and connect slots and signals
        self.feedback_mutex = QMutex()
        self.feedback_cond = QWaitCondition()
        self.api = ASETAPI(self.feedback_mutex, self.feedback_cond)
        self.api_thread = QThread()
        self.api.moveToThread(self.api_thread)
        self._connect_slots_and_signals()
        self.api_thread.start()

        # set up the status bar
        self.status_bar = self.statusBar()
        self.status_bar.setFont(STATUS_BAR_FONT)

        self.status_widget = QWidget(self.status_bar)
        self.status_widget_layout = QHBoxLayout(self.status_widget)
        self.status_widget_layout.setContentsMargins(0, 0, 0, 0)
        self.status_widget_message = QLabel()
        self.status_widget_message.setFont(STATUS_BAR_FONT)

        self.status_widget_message.setMinimumWidth(10)
        self.status_widget_layout.addWidget(self.status_widget_message)
        self.status_widget_progress = QProgressBar()
        self.status_widget_progress.setMinimumWidth(10)
        self.status_widget_progress.setMaximumWidth(200)
        self.status_widget_progress.setTextVisible(False)
        self.status_widget_progress.setMaximumHeight(20)
        self.status_widget_layout.addWidget(self.status_widget_progress)
        self.status_bar.addPermanentWidget(self.status_widget)

        # set up the actions
        self._all_actions = []
        self._was_enabled = []

        self.exit_action = QAction("&Exit", self)
        self.exit_action.setIcon(QIcon("aset_ui/resources/leave.svg"))
        self.exit_action.setStatusTip("Exit the application.")
        self.exit_action.triggered.connect(QApplication.instance().quit)
        self._all_actions.append(self.exit_action)

        self.show_document_base_creator_widget_action = QAction("&Create new document base", self)
        self.show_document_base_creator_widget_action.setIcon(QIcon("aset_ui/resources/two_documents.svg"))
        self.show_document_base_creator_widget_action.setStatusTip(
            "Create a new document base from a collection of .txt files and a list of attribute names."
        )
        self.show_document_base_creator_widget_action.triggered.connect(self.show_document_base_creator_widget_task)
        self._all_actions.append(self.show_document_base_creator_widget_action)

        self.add_attribute_action = QAction("&Add attribute", self)
        self.add_attribute_action.setIcon(QIcon("aset_ui/resources/plus.svg"))
        self.add_attribute_action.setStatusTip("Add a new attribute to the document base.")
        self.add_attribute_action.triggered.connect(self.add_attribute_task)
        self.add_attribute_action.setEnabled(False)
        self._all_actions.append(self.add_attribute_action)

        self.remove_attribute_action = QAction("&Remove attribute", self)
        self.remove_attribute_action.setIcon(QIcon("aset_ui/resources/trash.svg"))
        self.remove_attribute_action.setStatusTip("Remove an attribute from the document base.")
        self.remove_attribute_action.triggered.connect(self.remove_attribute_task)
        self.remove_attribute_action.setEnabled(False)
        self._all_actions.append(self.remove_attribute_action)

        self.forget_matches_for_attribute_action = QAction("&Forget matches for attribute", self)
        self.forget_matches_for_attribute_action.setIcon(QIcon("aset_ui/resources/redo.svg"))
        self.forget_matches_for_attribute_action.setStatusTip("Forget the matches for a single attribute.")
        self.forget_matches_for_attribute_action.triggered.connect(self.forget_matches_for_attribute_task)
        self.forget_matches_for_attribute_action.setEnabled(False)
        self._all_actions.append(self.forget_matches_for_attribute_action)

        self.load_document_base_from_bson_action = QAction("&Load document base", self)
        self.load_document_base_from_bson_action.setIcon(QIcon("aset_ui/resources/folder.svg"))
        self.load_document_base_from_bson_action.setStatusTip("Load an existing document base from a .bson file.")
        self.load_document_base_from_bson_action.triggered.connect(self.load_document_base_from_bson_task)
        self._all_actions.append(self.load_document_base_from_bson_action)

        self.save_document_base_to_bson_action = QAction("&Save document base", self)
        self.save_document_base_to_bson_action.setIcon(QIcon("aset_ui/resources/save.svg"))
        self.save_document_base_to_bson_action.setStatusTip("Save the document base in a .bson file.")
        self.save_document_base_to_bson_action.triggered.connect(self.save_document_base_to_bson_task)
        self.save_document_base_to_bson_action.setEnabled(False)
        self._all_actions.append(self.save_document_base_to_bson_action)

        self.save_table_to_csv_action = QAction("&Export table to CSV", self)
        self.save_table_to_csv_action.setIcon(QIcon("aset_ui/resources/table.svg"))
        self.save_table_to_csv_action.setStatusTip("Save the table to a .csv file.")
        self.save_table_to_csv_action.triggered.connect(self.save_table_to_csv_task)
        self.save_table_to_csv_action.setEnabled(False)
        self._all_actions.append(self.save_table_to_csv_action)

        self.forget_matches_action = QAction("&Forget all matches", self)
        self.forget_matches_action.setIcon(QIcon("aset_ui/resources/redo.svg"))
        self.forget_matches_action.setStatusTip("Forget the matches for all attributes.")
        self.forget_matches_action.triggered.connect(self.forget_matches_task)
        self.forget_matches_action.setEnabled(False)
        self._all_actions.append(self.forget_matches_action)

        self.load_and_run_default_preprocessing_phase_action = QAction(
            "&Load and run default preprocessing phase", self
        )
        self.load_and_run_default_preprocessing_phase_action.setStatusTip(
            "Load the default preprocessing phase and run it on the document collection."
        )
        self.load_and_run_default_preprocessing_phase_action.setIcon(QIcon("aset_ui/resources/run_run.svg"))
        self.load_and_run_default_preprocessing_phase_action.setDisabled(True)
        self.load_and_run_default_preprocessing_phase_action.triggered.connect(
            self.load_and_run_default_preprocessing_phase_task
        )
        self._all_actions.append(self.load_and_run_default_preprocessing_phase_action)

        self.load_preprocessing_phase_from_config_action = QAction("&Load preprocessing phase", self)
        self.load_preprocessing_phase_from_config_action.setStatusTip(
            "Load a preprocessing phase from a .json configuration file."
        )
        self.load_preprocessing_phase_from_config_action.triggered.connect(
            self.load_preprocessing_phase_from_config_task
        )
        self._all_actions.append(self.load_preprocessing_phase_from_config_action)

        self.save_preprocessing_phase_to_config_action = QAction("&Save preprocessing phase", self)
        self.save_preprocessing_phase_to_config_action.setStatusTip(
            "Save the preprocessing phase in a .json configuration file."
        )
        self.save_preprocessing_phase_to_config_action.triggered.connect(self.save_preprocessing_phase_to_config_task)
        self.save_preprocessing_phase_to_config_action.setEnabled(False)
        self._all_actions.append(self.save_preprocessing_phase_to_config_action)

        self.run_preprocessing_phase_action = QAction("Run preprocessing phase", self)
        self.run_preprocessing_phase_action.setIcon(QIcon("aset_ui/resources/run.svg"))
        self.run_preprocessing_phase_action.setStatusTip("Run the preprocessing phase on the document collection.")
        self.run_preprocessing_phase_action.triggered.connect(self.run_preprocessing_phase_task)
        self.run_preprocessing_phase_action.setEnabled(False)
        self._all_actions.append(self.run_preprocessing_phase_action)

        self.load_and_run_default_matching_phase_action = QAction(
            "&Load and run default matching phase", self
        )
        self.load_and_run_default_matching_phase_action.setStatusTip(
            "Load the default matching phase and run it on the document collection."
        )
        self.load_and_run_default_matching_phase_action.setIcon(QIcon("aset_ui/resources/run_run.svg"))
        self.load_and_run_default_matching_phase_action.setDisabled(True)
        self.load_and_run_default_matching_phase_action.triggered.connect(
            self.load_and_run_default_preprocessing_phase_task
        )
        self._all_actions.append(self.load_and_run_default_matching_phase_action)

        self.load_matching_phase_from_config_action = QAction("&Load matching phase", self)
        self.load_matching_phase_from_config_action.setStatusTip(
            "Load a matching phase from a .json configuration file."
        )
        self.load_matching_phase_from_config_action.triggered.connect(self.load_matching_phase_from_config_task)
        self._all_actions.append(self.load_matching_phase_from_config_action)

        self.save_matching_phase_to_config_action = QAction("&Save matching phase", self)
        self.save_matching_phase_to_config_action.setStatusTip("Save the matching phase in a .json configuration file.")
        self.save_matching_phase_to_config_action.triggered.connect(self.save_matching_phase_to_config_task)
        self.save_matching_phase_to_config_action.setEnabled(False)
        self._all_actions.append(self.save_matching_phase_to_config_action)

        self.run_matching_phase_action = QAction("Run matching phase", self)
        self.run_matching_phase_action.setIcon(QIcon("aset_ui/resources/run.svg"))
        self.run_matching_phase_action.setStatusTip("Run the matching phase on the document collection.")
        self.run_matching_phase_action.triggered.connect(self.run_matching_phase_task)
        self.run_matching_phase_action.setEnabled(False)
        self._all_actions.append(self.run_matching_phase_action)

        self.enable_collect_statistics_action = QAction("&Enable statistics", self)
        self.enable_collect_statistics_action.setIcon(QIcon("aset_ui/resources/statistics.svg"))
        self.enable_collect_statistics_action.setStatusTip("Enable collecting statistics.")
        self.enable_collect_statistics_action.triggered.connect(self.enable_collect_statistics_task)
        self.enable_collect_statistics_action.setEnabled(False)
        self._all_actions.append(self.enable_collect_statistics_action)

        self.disable_collect_statistics_action = QAction("&Disable statistics", self)
        self.disable_collect_statistics_action.setIcon(QIcon("aset_ui/resources/statistics_incorrect.svg"))
        self.disable_collect_statistics_action.setStatusTip("Disable collecting statistics.")
        self.disable_collect_statistics_action.triggered.connect(self.disable_collect_statistics_task)
        self._all_actions.append(self.disable_collect_statistics_action)

        self.save_statistics_to_json_action = QAction("&Save statistics", self)
        self.save_statistics_to_json_action.setIcon(QIcon("aset_ui/resources/statistics_save.svg"))
        self.save_statistics_to_json_action.setStatusTip("Save the statistics to a .json file.")
        self.save_statistics_to_json_action.triggered.connect(self.save_statistics_to_json_task)
        self.save_statistics_to_json_action.setEnabled(False)
        self._all_actions.append(self.save_statistics_to_json_action)

        # set up the menu bar
        self.menubar = self.menuBar()
        self.menubar.setFont(MENU_FONT)

        self.file_menu = self.menubar.addMenu("&File")
        self.file_menu.setFont(MENU_FONT)
        self.file_menu.addAction(self.exit_action)

        self.document_base_menu = self.menubar.addMenu("&Document Base")
        self.document_base_menu.setFont(MENU_FONT)
        self.document_base_menu.addAction(self.show_document_base_creator_widget_action)
        self.document_base_menu.addSeparator()
        self.document_base_menu.addAction(self.load_document_base_from_bson_action)
        self.document_base_menu.addAction(self.save_document_base_to_bson_action)
        self.document_base_menu.addSeparator()
        self.document_base_menu.addAction(self.save_table_to_csv_action)
        self.document_base_menu.addSeparator()
        self.document_base_menu.addAction(self.add_attribute_action)
        self.document_base_menu.addAction(self.remove_attribute_action)
        self.document_base_menu.addSeparator()
        self.document_base_menu.addAction(self.forget_matches_for_attribute_action)
        self.document_base_menu.addAction(self.forget_matches_action)

        self.preprocessing_menu = self.menubar.addMenu("&Preprocessing")
        self.preprocessing_menu.setFont(MENU_FONT)
        self.preprocessing_menu.addAction(self.load_and_run_default_preprocessing_phase_action)
        self.preprocessing_menu.addSeparator()
        self.preprocessing_menu.addAction(self.load_preprocessing_phase_from_config_action)
        self.preprocessing_menu.addAction(self.save_preprocessing_phase_to_config_action)
        self.preprocessing_menu.addSeparator()
        self.preprocessing_menu.addAction(self.run_preprocessing_phase_action)

        self.matching_menu = self.menubar.addMenu("&Matching")
        self.matching_menu.setFont(MENU_FONT)
        self.matching_menu.addAction(self.load_and_run_default_matching_phase_action)
        self.matching_menu.addSeparator()
        self.matching_menu.addAction(self.load_matching_phase_from_config_action)
        self.matching_menu.addAction(self.save_matching_phase_to_config_action)
        self.matching_menu.addSeparator()
        self.matching_menu.addAction(self.run_matching_phase_action)

        self.statistics_menu = self.menubar.addMenu("&Statistics")
        self.statistics_menu.setFont(MENU_FONT)
        self.statistics_menu.addAction(self.enable_collect_statistics_action)
        self.statistics_menu.addAction(self.disable_collect_statistics_action)
        self.statistics_menu.addSeparator()
        self.statistics_menu.addAction(self.save_statistics_to_json_action)

        # start menu
        self.start_menu_widget = QWidget()
        self.start_menu_layout = QVBoxLayout(self.start_menu_widget)
        self.start_menu_layout.setContentsMargins(0, 0, 0, 0)
        self.start_menu_layout.setSpacing(30)
        self.start_menu_layout.setAlignment(Qt.AlignmentFlag.AlignTop)
        self.start_menu_widget.setMaximumWidth(400)

        self.start_menu_header = QLabel("Welcome to ASET!")
        self.start_menu_header.setFont(HEADER_FONT)
        self.start_menu_layout.addWidget(self.start_menu_header)

        self.start_menu_create_new_document_base_widget = QWidget()
        self.start_menu_create_new_document_base_layout = QVBoxLayout(self.start_menu_create_new_document_base_widget)
        self.start_menu_create_new_document_base_layout.setContentsMargins(0, 0, 0, 0)
        self.start_menu_create_new_document_base_layout.setSpacing(10)
        self.start_menu_layout.addWidget(self.start_menu_create_new_document_base_widget)

        self.start_menu_create_new_document_base_subheader = QLabel("Create a new document base.")
        self.start_menu_create_new_document_base_subheader.setFont(SUBHEADER_FONT)
        self.start_menu_create_new_document_base_layout.addWidget(self.start_menu_create_new_document_base_subheader)

        self.start_menu_create_new_document_base_wrapper_widget = QWidget()
        self.start_menu_create_new_document_base_wrapper_layout = QHBoxLayout(
            self.start_menu_create_new_document_base_wrapper_widget)
        self.start_menu_create_new_document_base_wrapper_layout.setContentsMargins(0, 0, 0, 0)
        self.start_menu_create_new_document_base_wrapper_layout.setSpacing(20)
        self.start_menu_create_new_document_base_wrapper_layout.setAlignment(Qt.AlignmentFlag.AlignLeft)
        self.start_menu_create_new_document_base_layout.addWidget(
            self.start_menu_create_new_document_base_wrapper_widget)

        self.start_menu_create_document_base_button = QPushButton()
        self.start_menu_create_document_base_button.setFixedHeight(45)
        self.start_menu_create_document_base_button.setFixedWidth(45)
        self.start_menu_create_document_base_button.setIcon(QIcon("aset_ui/resources/two_documents.svg"))
        self.start_menu_create_document_base_button.clicked.connect(self.show_document_base_creator_widget_task)
        self.start_menu_create_new_document_base_wrapper_layout.addWidget(self.start_menu_create_document_base_button)

        self.start_menu_create_document_base_label = QLabel(
            "Create a new document base from a directory\nof .txt files and a list of attribute names.")
        self.start_menu_create_document_base_label.setFont(LABEL_FONT)
        self.start_menu_create_new_document_base_wrapper_layout.addWidget(self.start_menu_create_document_base_label)

        self.start_menu_load_document_base_widget = QWidget()
        self.start_menu_load_document_base_layout = QVBoxLayout(self.start_menu_load_document_base_widget)
        self.start_menu_load_document_base_layout.setContentsMargins(0, 0, 0, 0)
        self.start_menu_load_document_base_layout.setSpacing(10)
        self.start_menu_layout.addWidget(self.start_menu_load_document_base_widget)

        self.start_menu_load_document_base_subheader = QLabel("Load an existing document base.")
        self.start_menu_load_document_base_subheader.setFont(SUBHEADER_FONT)
        self.start_menu_load_document_base_layout.addWidget(self.start_menu_load_document_base_subheader)

        self.start_menu_load_document_base_wrapper_widget = QWidget()
        self.start_menu_load_document_base_wrapper_layout = QHBoxLayout(
            self.start_menu_load_document_base_wrapper_widget)
        self.start_menu_load_document_base_wrapper_layout.setContentsMargins(0, 0, 0, 0)
        self.start_menu_load_document_base_wrapper_layout.setSpacing(20)
        self.start_menu_load_document_base_wrapper_layout.setAlignment(Qt.AlignmentFlag.AlignLeft)
        self.start_menu_load_document_base_layout.addWidget(self.start_menu_load_document_base_wrapper_widget)

        self.start_menu_load_document_base_button = QPushButton()
        self.start_menu_load_document_base_button.setFixedHeight(45)
        self.start_menu_load_document_base_button.setFixedWidth(45)
        self.start_menu_load_document_base_button.setIcon(QIcon("aset_ui/resources/folder.svg"))
        self.start_menu_load_document_base_button.clicked.connect(self.load_document_base_from_bson_task)
        self.start_menu_load_document_base_wrapper_layout.addWidget(self.start_menu_load_document_base_button)

        self.start_menu_load_document_base_label = QLabel("Load an existing document base\nfrom a .bson file.")
        self.start_menu_load_document_base_label.setFont(LABEL_FONT)
        self.start_menu_load_document_base_wrapper_layout.addWidget(self.start_menu_load_document_base_label)

        # main UI
        self.central_widget = QWidget(self)
        self.central_widget_layout = QHBoxLayout(self.central_widget)
        self.central_widget_layout.setAlignment(Qt.AlignmentFlag.AlignCenter)
        self.setCentralWidget(self.central_widget)

        self.document_base_creator_widget = DocumentBaseCreatorWidget(self)
        self.document_base_viewer_widget = DocumentBaseViewerWidget(self)
        self.interactive_matching_widget = InteractiveMatchingWidget(self)

        self.document_base_creator_widget.hide()
        self.document_base_viewer_widget.hide()
        self.interactive_matching_widget.hide()
        self.central_widget_layout.addWidget(self.start_menu_widget)
        self.central_widget_layout.update()

        self.resize(1400, 800)
        self.show()

        logger.info("Initialized MainWindow.")