def preprocessing(self):
        """Preprocess an monster, and set important object variables.

        This function preprocesses every monster dumped from the OSRS cache. Various
        properties are set to help further processing. MORE."""
        # Set monster ID variables
        self.monster_id_int = int(self.monster_id)  # Monster ID number as an integer
        self.monster_id_str = str(self.monster_id)  # Monster ID number as a string

        # Load monster dictionary of cache data based on monster ID
        # This raw cache data is the baseline information about the specific monster
        # and can be considered 100% correct and available for every monster
        self.monster_cache_data = self.all_monster_cache_data[self.monster_id_str]

        # Set monster name variable (directly from the cache dump)
        self.monster_name = self.monster_cache_data["name"]

        # Log and print monster
        logging.debug(f"======================= {self.monster_id_str} {self.monster_name}")
        # print(f"======================= {self.monster_id_str} {self.monster_name}")
        logging.debug(f"preprocessing: using the following cache data:")
        logging.debug(self.monster_cache_data)

        # Set all variables to None (for invalid monsters)
        self.monster_wikitext = None
        self.wikitext_found_using = None
        self.has_infobox = False

        # Try to find the wiki data using direct ID number search
        if self.all_wikitext_processed.get(self.monster_id_str, None):
            self.monster_wikitext = self.all_wikitext_processed.get(self.monster_id_str, None)
            self.wikitext_found_using = "id"

        # Try to find the wiki data using direct name search
        elif self.all_wikitext_raw.get(self.monster_name, None):
            self.monster_wikitext = self.all_wikitext_raw.get(self.monster_name, None)
            self.wikitext_found_using = "name"

        logging.debug(f"preprocessing: self.monster_wikitext found using: {self.wikitext_found_using}")

        # If there is no wikitext, and the monster is valid, raise a critical error
        if not self.monster_wikitext:
            logging.critical("CRITICAL: Could not find monster_wikitext by id or name...")
            return False

        # Parse the infobox monster
        infobox_parser = WikitextTemplateParser(self.monster_wikitext)

        # Try extract infobox for monster
        self.has_infobox = infobox_parser.extract_infobox("infobox monster")
        if not self.has_infobox:
            logging.critical("CRITICAL: Could not find template...")
            return False

        self.is_versioned = infobox_parser.determine_infobox_versions()
        logging.debug(f"preprocessing: Is the infobox versioned: {self.is_versioned}")
        self.versioned_ids = infobox_parser.extract_infobox_ids()
        logging.debug(f"preprocessing: Versioned IDs: {self.versioned_ids}")

        # Set the infobox version number, default to empty string (no version number)
        try:
            if self.versioned_ids:
                self.infobox_version_number = self.versioned_ids[self.monster_id_int]
        except KeyError:
            if self.is_versioned:
                self.infobox_version_number = "1"
            else:
                self.infobox_version_number = ""
        logging.debug(f"preprocessing: infobox_version_number: {self.infobox_version_number}")

        # Set the template
        self.template = infobox_parser.template

        return True
Exemple #2
0
    def preprocessing(self) -> Dict:
        """Preprocess an item, and set important object variables.

        This function preprocesses every item dumped from the OSRS cache. Various
        properties are set to help further processing. Items are determined if
        they are a linked item (noted/placeholder), or an actual item. The item
        is checked if it is a valid item (has a wiki page, is an actual item etc.).
        Finally, the wikitext (from the OSRS wiki) is found by looking up ID, linked
        ID, name, and normalized name. The `Infobox Item` or `Infobox Pet` is then
        extracted so that the wiki properties can be later processed and populated.

        :return: A dictionary including success and code.
        """
        # Initialize dictionary to return preprocessing status
        status = {
            "status": False,
            "code": None
        }

        # Set item ID variables
        self.item_id_str = str(self.item_id)

        # Load item dictionary of cache data based on item ID
        # This raw cache data is the baseline information about the specific item
        # and can be considered 100% correct and available for every item
        self.item_cache_data = self.all_items_cache_data[self.item_id_str]

        # Set item name variable (directly from the cache dump)
        self.item_name = self.item_cache_data["name"]

        if self.verbose:
            print(f">>> {self.item_id_str} {self.item_name}")

        # Get the linked ID item value, if available
        self.linked_id_item_int = None
        self.linked_id_item_str = None
        if self.item_cache_data["linked_id_item"] is not None:
            self.linked_id_item_int = int(self.item_cache_data["linked_id_item"])
            self.linked_id_item_str = str(self.item_cache_data["linked_id_item"])

        # Determine the ID number to extract
        # Noted and placeholder items should use the linked_id_item property
        # to fill in additional wiki data...
        item_id_to_process_int = None
        if self.item_cache_data["noted"] is True or self.item_cache_data["placeholder"] is True:
            item_id_to_process_int = int(self.linked_id_item_int)
        else:
            item_id_to_process_int = int(self.item_id)

        # Find the wiki page
        has_infobox = False

        # Try to find the wiki data using direct ID number search
        if self.all_wikitext_processed.get(self.item_id_str, None):
            self.item_wikitext = self.all_wikitext_processed.get(self.item_id_str, None)
            self.wikitext_found_using = "id"
            status["code"] = "lookup_passed_id"
            status["status"] = True

        # Try to find the wiki data using linked_id_item ID number search
        elif self.all_wikitext_processed.get(self.linked_id_item_str, None):
            self.item_wikitext = self.all_wikitext_processed.get(self.linked_id_item_str, None)
            self.wikitext_found_using = "linked_id"
            status["code"] = "lookup_passed_linked_id"
            status["status"] = True

        # Try to find the wiki data using direct name search
        elif self.all_wikitext_raw.get(self.item_name, None):
            self.item_wikitext = self.all_wikitext_raw.get(self.item_name, None)
            self.wikitext_found_using = "name"
            status["code"] = "lookup_passed_name"
            status["status"] = True

        else:
            status["code"] = "no_item_wikitext"
            return status

        # Parse the infobox item
        infobox_parser = WikitextTemplateParser(self.item_wikitext)

        # Try extract infobox for item, then pet
        has_infobox = infobox_parser.extract_infobox("infobox item")
        if not has_infobox:
            has_infobox = infobox_parser.extract_infobox("infobox pet")
            if not has_infobox:
                self.template = None
                status["code"] = "no_infobox_template"
                return status

        is_versioned = infobox_parser.determine_infobox_versions()
        versioned_ids = infobox_parser.extract_infobox_ids()

        # Set the infobox version number, default to empty string (no version number)
        try:
            if versioned_ids:
                self.infobox_version_number = versioned_ids[item_id_to_process_int]
        except KeyError:
            if is_versioned:
                self.infobox_version_number = "1"
            else:
                self.infobox_version_number = ""

        # Set the template
        self.template = infobox_parser.template

        status["status"] = True
        return status
Exemple #3
0
    def preprocessing(self) -> Dict:
        """Preprocess an item, and set important object variables.

        This function preprocesses every item dumped from the OSRS cache. Various
        properties are set to help further processing. Items are determined if
        they are a linked item (noted/placeholder), or an actual item. The item
        is checked if it is a valid item (has a wiki page, is an actual item etc.).
        Finally, the wikitext (from the OSRS wiki) is found by looking up ID, linked
        ID, name, and normalized name. The `Infobox Item` or `Infobox Pet` is then
        extracted so that the wiki properties can be later processed and populated.

        :return: A dictionary including success and code.
        """
        # Initialize dictionary to return preprocessing status
        return_status = {"status": False, "code": None}

        # Set item ID variables
        self.item_id_int = int(self.item_id)  # Item ID number as an integer
        self.item_id_str = str(self.item_id)  # Item ID number as a string

        # Load item dictionary of cache data based on item ID
        # This raw cache data is the baseline information about the specific item
        # and can be considered 100% correct and available for every item
        self.item_cache_data = self.all_item_cache_data[self.item_id_str]

        # Set item name variable (directly from the cache dump)
        self.item_name = self.item_cache_data["name"]

        # Log and print item
        logging.debug(
            f"======================= {self.item_id_str} {self.item_name}")
        if self.verbose:
            print(
                f"======================= {self.item_id_str} {self.item_name}")
        logging.debug("preprocessing: using the following cache data:")
        logging.debug(self.item_cache_data)

        # Get the linked ID item value, if available
        self.linked_id_item_int = None
        self.linked_id_item_str = None
        if self.item_cache_data["linked_id_item"] is not None:
            self.linked_id_item_int = int(
                self.item_cache_data["linked_id_item"])
            self.linked_id_item_str = str(
                self.item_cache_data["linked_id_item"])
        logging.debug(
            f"preprocessing: Linked item ID: {self.linked_id_item_str}")

        # Determine the ID number to extract
        # Noted and placeholder items should use the linked_id_item property
        # to fill in additional wiki data...
        self.item_id_to_process_int = None
        self.item_id_to_process_str = None
        if self.item_cache_data["noted"] is True or self.item_cache_data[
                "placeholder"] is True:
            self.item_id_to_process_int = int(self.linked_id_item_int)
            self.item_id_to_process_str = str(self.linked_id_item_str)
        else:
            self.item_id_to_process_int = int(self.item_id)
            self.item_id_to_process_str = str(self.item_id)
        logging.debug(
            f"preprocessing: ID to process: {self.item_id_to_process_str}")

        # Find the wiki page
        # Set all variables to None (for invalid items)
        self.item_wikitext = None
        self.wikitext_found_using = None
        self.has_infobox = False

        # Try to find the wiki data using direct ID number search
        if self.all_wikitext_processed.get(self.item_id_str, None):
            self.item_wikitext = self.all_wikitext_processed.get(
                self.item_id_str, None)
            self.wikitext_found_using = "id"
            return_status["code"] = "lookup_passed_id"

        # Try to find the wiki data using linked_id_item ID number search
        elif self.all_wikitext_processed.get(self.linked_id_item_str, None):
            self.item_wikitext = self.all_wikitext_processed.get(
                self.linked_id_item_str, None)
            self.wikitext_found_using = "linked_id"
            return_status["code"] = "lookup_passed_linked_id"

        # Try to find the wiki data using direct name search
        elif self.all_wikitext_raw.get(self.item_name, None):
            self.item_wikitext = self.all_wikitext_raw.get(
                self.item_name, None)
            self.wikitext_found_using = "name"
            return_status["code"] = "lookup_passed_name"

        if self.item_id_to_process_str in self.invalid_items_data:
            # Anything here means item cannot be found by id, linked_id, or name
            # This can include not being an actual item, has no wiki page etc.
            # The item must be invalid, handle it accordingly
            self.is_invalid_item = True
            try:
                self.status = self.invalid_items_data[
                    self.item_id_to_process_str]["status"]
                self.normalized_name = self.invalid_items_data[
                    self.item_id_to_process_str]["normalized_name"]
            except KeyError:
                self.status = None
                self.normalized_name = None
            logging.debug(
                f"preprocessing: Invalid item details: {self.is_invalid_item} {self.status} {self.normalized_name}"
            )

            # Try to find the wiki data using normalized_name search
            if self.all_wikitext_raw.get(self.normalized_name, None):
                self.item_wikitext = self.all_wikitext_raw.get(
                    self.normalized_name, None)
                self.wikitext_found_using = "normalized_name"
                return_status["code"] = "lookup_passed_normalized_name"
            else:
                return_status["code"] = "lookup_failed"

        logging.debug(
            f"preprocessing: self.item_wikitext found using: {self.wikitext_found_using}"
        )

        # If there is no wikitext, and the item is valid, raise a critical error
        if not self.item_wikitext and not self.is_invalid_item:
            logging.critical(
                "CRITICAL: Could not find item_wikitext by id, linked_id_item or name..."
            )
            return_status["code"] = "no_item_wikitext"
            return return_status

        # Parse the infobox item
        infobox_parser = WikitextTemplateParser(self.item_wikitext)

        # Try extract infobox for item, then pet
        self.has_infobox = infobox_parser.extract_infobox("infobox item")
        if not self.has_infobox:
            self.has_infobox = infobox_parser.extract_infobox("infobox pet")
            if not self.has_infobox:
                self.template = None
                logging.critical("CRITICAL: Could not find template...")
                return_status["code"] = "no_infobox_template"
                return return_status

        self.is_versioned = infobox_parser.determine_infobox_versions()
        logging.debug(
            f"preprocessing: Is the infobox versioned: {self.is_versioned}")
        self.versioned_ids = infobox_parser.extract_infobox_ids()
        logging.debug(f"preprocessing: Versioned IDs: {self.versioned_ids}")

        # Set the infobox version number, default to empty string (no version number)
        try:
            if self.versioned_ids:
                self.infobox_version_number = self.versioned_ids[
                    self.item_id_to_process_int]
        except KeyError:
            if self.is_versioned:
                self.infobox_version_number = "1"
            else:
                self.infobox_version_number = ""
        logging.debug(
            f"preprocessing: infobox_version_number: {self.infobox_version_number}"
        )

        # Set the template
        self.template = infobox_parser.template

        return_status["status"] = True
        return return_status