def preprocessing(self): """Preprocess an monster, and set important object variables. This function preprocesses every monster dumped from the OSRS cache. Various properties are set to help further processing. MORE.""" # Set monster ID variables self.monster_id_int = int(self.monster_id) # Monster ID number as an integer self.monster_id_str = str(self.monster_id) # Monster ID number as a string # Load monster dictionary of cache data based on monster ID # This raw cache data is the baseline information about the specific monster # and can be considered 100% correct and available for every monster self.monster_cache_data = self.all_monster_cache_data[self.monster_id_str] # Set monster name variable (directly from the cache dump) self.monster_name = self.monster_cache_data["name"] # Log and print monster logging.debug(f"======================= {self.monster_id_str} {self.monster_name}") # print(f"======================= {self.monster_id_str} {self.monster_name}") logging.debug(f"preprocessing: using the following cache data:") logging.debug(self.monster_cache_data) # Set all variables to None (for invalid monsters) self.monster_wikitext = None self.wikitext_found_using = None self.has_infobox = False # Try to find the wiki data using direct ID number search if self.all_wikitext_processed.get(self.monster_id_str, None): self.monster_wikitext = self.all_wikitext_processed.get(self.monster_id_str, None) self.wikitext_found_using = "id" # Try to find the wiki data using direct name search elif self.all_wikitext_raw.get(self.monster_name, None): self.monster_wikitext = self.all_wikitext_raw.get(self.monster_name, None) self.wikitext_found_using = "name" logging.debug(f"preprocessing: self.monster_wikitext found using: {self.wikitext_found_using}") # If there is no wikitext, and the monster is valid, raise a critical error if not self.monster_wikitext: logging.critical("CRITICAL: Could not find monster_wikitext by id or name...") return False # Parse the infobox monster infobox_parser = WikitextTemplateParser(self.monster_wikitext) # Try extract infobox for monster self.has_infobox = infobox_parser.extract_infobox("infobox monster") if not self.has_infobox: logging.critical("CRITICAL: Could not find template...") return False self.is_versioned = infobox_parser.determine_infobox_versions() logging.debug(f"preprocessing: Is the infobox versioned: {self.is_versioned}") self.versioned_ids = infobox_parser.extract_infobox_ids() logging.debug(f"preprocessing: Versioned IDs: {self.versioned_ids}") # Set the infobox version number, default to empty string (no version number) try: if self.versioned_ids: self.infobox_version_number = self.versioned_ids[self.monster_id_int] except KeyError: if self.is_versioned: self.infobox_version_number = "1" else: self.infobox_version_number = "" logging.debug(f"preprocessing: infobox_version_number: {self.infobox_version_number}") # Set the template self.template = infobox_parser.template return True
def preprocessing(self) -> Dict: """Preprocess an item, and set important object variables. This function preprocesses every item dumped from the OSRS cache. Various properties are set to help further processing. Items are determined if they are a linked item (noted/placeholder), or an actual item. The item is checked if it is a valid item (has a wiki page, is an actual item etc.). Finally, the wikitext (from the OSRS wiki) is found by looking up ID, linked ID, name, and normalized name. The `Infobox Item` or `Infobox Pet` is then extracted so that the wiki properties can be later processed and populated. :return: A dictionary including success and code. """ # Initialize dictionary to return preprocessing status status = { "status": False, "code": None } # Set item ID variables self.item_id_str = str(self.item_id) # Load item dictionary of cache data based on item ID # This raw cache data is the baseline information about the specific item # and can be considered 100% correct and available for every item self.item_cache_data = self.all_items_cache_data[self.item_id_str] # Set item name variable (directly from the cache dump) self.item_name = self.item_cache_data["name"] if self.verbose: print(f">>> {self.item_id_str} {self.item_name}") # Get the linked ID item value, if available self.linked_id_item_int = None self.linked_id_item_str = None if self.item_cache_data["linked_id_item"] is not None: self.linked_id_item_int = int(self.item_cache_data["linked_id_item"]) self.linked_id_item_str = str(self.item_cache_data["linked_id_item"]) # Determine the ID number to extract # Noted and placeholder items should use the linked_id_item property # to fill in additional wiki data... item_id_to_process_int = None if self.item_cache_data["noted"] is True or self.item_cache_data["placeholder"] is True: item_id_to_process_int = int(self.linked_id_item_int) else: item_id_to_process_int = int(self.item_id) # Find the wiki page has_infobox = False # Try to find the wiki data using direct ID number search if self.all_wikitext_processed.get(self.item_id_str, None): self.item_wikitext = self.all_wikitext_processed.get(self.item_id_str, None) self.wikitext_found_using = "id" status["code"] = "lookup_passed_id" status["status"] = True # Try to find the wiki data using linked_id_item ID number search elif self.all_wikitext_processed.get(self.linked_id_item_str, None): self.item_wikitext = self.all_wikitext_processed.get(self.linked_id_item_str, None) self.wikitext_found_using = "linked_id" status["code"] = "lookup_passed_linked_id" status["status"] = True # Try to find the wiki data using direct name search elif self.all_wikitext_raw.get(self.item_name, None): self.item_wikitext = self.all_wikitext_raw.get(self.item_name, None) self.wikitext_found_using = "name" status["code"] = "lookup_passed_name" status["status"] = True else: status["code"] = "no_item_wikitext" return status # Parse the infobox item infobox_parser = WikitextTemplateParser(self.item_wikitext) # Try extract infobox for item, then pet has_infobox = infobox_parser.extract_infobox("infobox item") if not has_infobox: has_infobox = infobox_parser.extract_infobox("infobox pet") if not has_infobox: self.template = None status["code"] = "no_infobox_template" return status is_versioned = infobox_parser.determine_infobox_versions() versioned_ids = infobox_parser.extract_infobox_ids() # Set the infobox version number, default to empty string (no version number) try: if versioned_ids: self.infobox_version_number = versioned_ids[item_id_to_process_int] except KeyError: if is_versioned: self.infobox_version_number = "1" else: self.infobox_version_number = "" # Set the template self.template = infobox_parser.template status["status"] = True return status
def preprocessing(self) -> Dict: """Preprocess an item, and set important object variables. This function preprocesses every item dumped from the OSRS cache. Various properties are set to help further processing. Items are determined if they are a linked item (noted/placeholder), or an actual item. The item is checked if it is a valid item (has a wiki page, is an actual item etc.). Finally, the wikitext (from the OSRS wiki) is found by looking up ID, linked ID, name, and normalized name. The `Infobox Item` or `Infobox Pet` is then extracted so that the wiki properties can be later processed and populated. :return: A dictionary including success and code. """ # Initialize dictionary to return preprocessing status return_status = {"status": False, "code": None} # Set item ID variables self.item_id_int = int(self.item_id) # Item ID number as an integer self.item_id_str = str(self.item_id) # Item ID number as a string # Load item dictionary of cache data based on item ID # This raw cache data is the baseline information about the specific item # and can be considered 100% correct and available for every item self.item_cache_data = self.all_item_cache_data[self.item_id_str] # Set item name variable (directly from the cache dump) self.item_name = self.item_cache_data["name"] # Log and print item logging.debug( f"======================= {self.item_id_str} {self.item_name}") if self.verbose: print( f"======================= {self.item_id_str} {self.item_name}") logging.debug("preprocessing: using the following cache data:") logging.debug(self.item_cache_data) # Get the linked ID item value, if available self.linked_id_item_int = None self.linked_id_item_str = None if self.item_cache_data["linked_id_item"] is not None: self.linked_id_item_int = int( self.item_cache_data["linked_id_item"]) self.linked_id_item_str = str( self.item_cache_data["linked_id_item"]) logging.debug( f"preprocessing: Linked item ID: {self.linked_id_item_str}") # Determine the ID number to extract # Noted and placeholder items should use the linked_id_item property # to fill in additional wiki data... self.item_id_to_process_int = None self.item_id_to_process_str = None if self.item_cache_data["noted"] is True or self.item_cache_data[ "placeholder"] is True: self.item_id_to_process_int = int(self.linked_id_item_int) self.item_id_to_process_str = str(self.linked_id_item_str) else: self.item_id_to_process_int = int(self.item_id) self.item_id_to_process_str = str(self.item_id) logging.debug( f"preprocessing: ID to process: {self.item_id_to_process_str}") # Find the wiki page # Set all variables to None (for invalid items) self.item_wikitext = None self.wikitext_found_using = None self.has_infobox = False # Try to find the wiki data using direct ID number search if self.all_wikitext_processed.get(self.item_id_str, None): self.item_wikitext = self.all_wikitext_processed.get( self.item_id_str, None) self.wikitext_found_using = "id" return_status["code"] = "lookup_passed_id" # Try to find the wiki data using linked_id_item ID number search elif self.all_wikitext_processed.get(self.linked_id_item_str, None): self.item_wikitext = self.all_wikitext_processed.get( self.linked_id_item_str, None) self.wikitext_found_using = "linked_id" return_status["code"] = "lookup_passed_linked_id" # Try to find the wiki data using direct name search elif self.all_wikitext_raw.get(self.item_name, None): self.item_wikitext = self.all_wikitext_raw.get( self.item_name, None) self.wikitext_found_using = "name" return_status["code"] = "lookup_passed_name" if self.item_id_to_process_str in self.invalid_items_data: # Anything here means item cannot be found by id, linked_id, or name # This can include not being an actual item, has no wiki page etc. # The item must be invalid, handle it accordingly self.is_invalid_item = True try: self.status = self.invalid_items_data[ self.item_id_to_process_str]["status"] self.normalized_name = self.invalid_items_data[ self.item_id_to_process_str]["normalized_name"] except KeyError: self.status = None self.normalized_name = None logging.debug( f"preprocessing: Invalid item details: {self.is_invalid_item} {self.status} {self.normalized_name}" ) # Try to find the wiki data using normalized_name search if self.all_wikitext_raw.get(self.normalized_name, None): self.item_wikitext = self.all_wikitext_raw.get( self.normalized_name, None) self.wikitext_found_using = "normalized_name" return_status["code"] = "lookup_passed_normalized_name" else: return_status["code"] = "lookup_failed" logging.debug( f"preprocessing: self.item_wikitext found using: {self.wikitext_found_using}" ) # If there is no wikitext, and the item is valid, raise a critical error if not self.item_wikitext and not self.is_invalid_item: logging.critical( "CRITICAL: Could not find item_wikitext by id, linked_id_item or name..." ) return_status["code"] = "no_item_wikitext" return return_status # Parse the infobox item infobox_parser = WikitextTemplateParser(self.item_wikitext) # Try extract infobox for item, then pet self.has_infobox = infobox_parser.extract_infobox("infobox item") if not self.has_infobox: self.has_infobox = infobox_parser.extract_infobox("infobox pet") if not self.has_infobox: self.template = None logging.critical("CRITICAL: Could not find template...") return_status["code"] = "no_infobox_template" return return_status self.is_versioned = infobox_parser.determine_infobox_versions() logging.debug( f"preprocessing: Is the infobox versioned: {self.is_versioned}") self.versioned_ids = infobox_parser.extract_infobox_ids() logging.debug(f"preprocessing: Versioned IDs: {self.versioned_ids}") # Set the infobox version number, default to empty string (no version number) try: if self.versioned_ids: self.infobox_version_number = self.versioned_ids[ self.item_id_to_process_int] except KeyError: if self.is_versioned: self.infobox_version_number = "1" else: self.infobox_version_number = "" logging.debug( f"preprocessing: infobox_version_number: {self.infobox_version_number}" ) # Set the template self.template = infobox_parser.template return_status["status"] = True return return_status