def RemoveExtensionsAndFlags(self, node_elements: list): """ This function removes all word extensions and flag elements in a given node_sequence :param node_elements:list: split elements defining the node sequence """ try: results = [] if isNotNone(node_elements): for node_element in node_elements: if isInStr("-", node_element) or isInStr( ":", node_element): if isInStr("-", node_element) and isNotInStr( ":", node_element): sub_sequence = node_element[0:node_element. rfind('-')] if (len(sub_sequence) > 0): results.append(sub_sequence) else: continue else: if (len(node_element) > 0): results.append(node_element) return results except Exception as ex: template = "An exception of type {0} occurred in [TParser.RemoveExtensionsAndFlags]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def FinalFormatter(self, in_context: str): """ This function clean up a raw spacing format removed AMR semantic string. :param in_context:str: a raw spacing format removed AMR semantic string """ try: depth = -1 openings = in_context.split(self.node_parenthesis[0]) struct_contain = [] for line in openings: depth = depth + 1 new_line = self.AddLeadingSpace( depth, (self.node_parenthesis[0] + line)) if isInStr(self.node_parenthesis[1], new_line): occourences = self.CountSignOccurence( new_line, self.node_parenthesis[1]) depth = depth - occourences new_line = self.HandleAdditionalContent(new_line) struct_contain.append(new_line) joined = '\n'.join(struct_contain) + ')' return joined.replace('-', ' ') except Exception as ex: template = "An exception of type {0} occurred in [AMRCleaner.FinalFormatter]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def DeleteFlags(self, in_context: str): """ This function delete AMR flags and only keep the informations they were flagged. :param in_context:str: string with at least on AMR flag """ try: if isInStr(self.constants.COLON, in_context): in_context = re.sub(self.constants.FLAG_REGEX, '', in_context) return in_context except Exception as ex: template = "An exception of type {0} occurred in [ARMCleaner.DeleteFlags]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def HandleAdditionalContent(self, in_context: str): """ This function search in a AMR line fragment about additional context for the AMR node and replace or remove problematic content. :param in_context:str: a AMR line fragment with a node and maybe additional context """ try: if isInStr(self.constants.COLON, in_context): in_context = self.DeleteFlags(in_context) if isInStr('+', in_context): in_context = self.ReplacePoliteSign(in_context) if isInStr('-', in_context): in_context = self.ReplacePolaritySign(in_context) in_context = self.DeleteWordExtension(in_context) in_context = self.DeleteUnusedSigns(in_context) return in_context except Exception as ex: template = "An exception of type {0} occurred in [AMRCleaner.HandleAdditionalContent]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def DeleteWordExtension(self, in_context: str): """ This function delete word extensions from node content in a AMR semantic line fragment. :param in_context:str: a semantic line fragment """ try: if isInStr('-', in_context): in_context = re.sub(self.constants.EXTENSION_NUMBER_REGEX, '', in_context) return in_context except ValueError: print( "ERR: No content passed to [ARMCleaner.DeleteWordExtension].") except Exception as ex: template = "An exception of type {0} occurred in [ARMCleaner.DeleteWordExtension]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def CleanNodeSequence(self, sequence: str): """ This function cleans the node sequence and returns label and content only. It will also cuts of word extensions so it just get the basis word of a nodes content! :param sequence:str: node sequence """ try: node_seq = self.ExtractRawNodeSequence(sequence) if (isInStr(' ', node_seq)): elements = node_seq.split(' ') results = self.RemoveExtensionsAndFlags(elements) node_seq = ' '.join(results) return node_seq except Exception as ex: template = "An exception of type {0} occurred in [TParser.CleanNodeSequence]. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)