def test_initialization(): family_element = FamilyElement(level=-1, pointer="", tag=gedcom.tags.GEDCOM_TAG_FAMILY, value="") assert isinstance(family_element, Element) assert isinstance(family_element, FamilyElement)
def new_child_element(self, tag, pointer="", value=""): """Creates and returns a new child element of this element :type tag: str :type pointer: str :type value: str :rtype: Element """ from gedcom.element.family import FamilyElement from gedcom.element.file import FileElement from gedcom.element.individual import IndividualElement from gedcom.element.object import ObjectElement # Differentiate between the type of the new child element if tag == gedcom.tags.GEDCOM_TAG_FAMILY: child_element = FamilyElement(self.get_level() + 1, pointer, tag, value, self.__crlf) elif tag == gedcom.tags.GEDCOM_TAG_FILE: child_element = FileElement(self.get_level() + 1, pointer, tag, value, self.__crlf) elif tag == gedcom.tags.GEDCOM_TAG_INDIVIDUAL: child_element = IndividualElement(self.get_level() + 1, pointer, tag, value, self.__crlf) elif tag == gedcom.tags.GEDCOM_TAG_OBJECT: child_element = ObjectElement(self.get_level() + 1, pointer, tag, value, self.__crlf) else: child_element = Element(self.get_level() + 1, pointer, tag, value, self.__crlf) self.add_child_element(child_element) return child_element
def __parse_line(line_number, line, last_element, strict=True): """Parse a line from a GEDCOM 5.5 formatted document Each line should have the following (bracketed items optional): level + ' ' + [pointer + ' ' +] tag + [' ' + line_value] :type line_number: int :type line: str :type last_element: Element :type strict: bool :rtype: Element """ # Level must start with non-negative int, no leading zeros. level_regex = '^(0|[1-9]+[0-9]*) ' # Pointer optional, if it exists it must be flanked by `@` pointer_regex = '(@[^@]+@ |)' # Tag must be an alphanumeric string tag_regex = '([A-Za-z0-9_]+)' # Value optional, consists of anything after a space to end of line value_regex = '( [^\n\r]*|)' # End of line defined by `\n` or `\r` end_of_line_regex = '([\r\n]{1,2})' # Complete regex gedcom_line_regex = level_regex + pointer_regex + tag_regex + value_regex + end_of_line_regex regex_match = regex.match(gedcom_line_regex, line) if regex_match is None: if strict: error_message = ( "Line %d of document violates GEDCOM format 5.5" % line_number + "\nSee: https://chronoplexsoftware.com/gedcomvalidator/gedcom/gedcom-5.5.pdf" ) raise GedcomFormatViolationError(error_message) else: # Quirk check - see if this is a line without a CRLF (which could be the last line) last_line_regex = level_regex + pointer_regex + tag_regex + value_regex regex_match = regex.match(last_line_regex, line) if regex_match is not None: line_parts = regex_match.groups() level = int(line_parts[0]) pointer = line_parts[1].rstrip(' ') tag = line_parts[2] value = line_parts[3][1:] crlf = '\n' else: # Quirk check - Sometimes a gedcom has a text field with a CR. # This creates a line without the standard level and pointer. # If this is detected then turn it into a CONC or CONT. line_regex = '([^\n\r]*|)' cont_line_regex = line_regex + end_of_line_regex regex_match = regex.match(cont_line_regex, line) line_parts = regex_match.groups() level = last_element.get_level() tag = last_element.get_tag() pointer = None value = line_parts[0][1:] crlf = line_parts[1] if tag != gedcom.tags.GEDCOM_TAG_CONTINUED and tag != gedcom.tags.GEDCOM_TAG_CONCATENATION: # Increment level and change this line to a CONC level += 1 tag = gedcom.tags.GEDCOM_TAG_CONCATENATION else: line_parts = regex_match.groups() level = int(line_parts[0]) pointer = line_parts[1].rstrip(' ') tag = line_parts[2] value = line_parts[3][1:] crlf = line_parts[4] # Check level: should never be more than one higher than previous line. if level > last_element.get_level() + 1: error_message = ( "Line %d of document violates GEDCOM format 5.5" % line_number + "\nLines must be no more than one level higher than previous line." + "\nSee: https://chronoplexsoftware.com/gedcomvalidator/gedcom/gedcom-5.5.pdf" ) raise GedcomFormatViolationError(error_message) # Create element. Store in list and dict, create children and parents. if tag == gedcom.tags.GEDCOM_TAG_INDIVIDUAL: element = IndividualElement(level, pointer, tag, value, crlf, multi_line=False) elif tag == gedcom.tags.GEDCOM_TAG_FAMILY: element = FamilyElement(level, pointer, tag, value, crlf, multi_line=False) elif tag == gedcom.tags.GEDCOM_TAG_FILE: element = FileElement(level, pointer, tag, value, crlf, multi_line=False) elif tag == gedcom.tags.GEDCOM_TAG_OBJECT: element = ObjectElement(level, pointer, tag, value, crlf, multi_line=False) else: element = Element(level, pointer, tag, value, crlf, multi_line=False) # Start with last element as parent, back up if necessary. parent_element = last_element while parent_element.get_level() > level - 1: parent_element = parent_element.get_parent_element() # Add child to parent & parent to child. parent_element.add_child_element(element) return element
def create_family(ptr: str, level: int = 0, husb_ptrs: Tuple[str, ...] = None, wife_ptrs: Tuple[str, ...] = None, child_ptrs: Tuple[str, ...] = None, marriage_place: str = None, marriage_date: str = None, divorce_place: str = None, divorce_date: str = None): family_element = FamilyElement(level, ptr, tags.GEDCOM_TAG_FAMILY, '') if husb_ptrs: for husb_ptr in husb_ptrs: family_element.add_child_element(Element(level + 1, '', tags.GEDCOM_TAG_HUSBAND, husb_ptr)) if wife_ptrs: for wife_ptr in wife_ptrs: family_element.add_child_element(Element(level + 1, '', tags.GEDCOM_TAG_WIFE, wife_ptr)) if child_ptrs: for child_ptr in child_ptrs: family_element.add_child_element(Element(level + 1, '', tags.GEDCOM_TAG_CHILD, child_ptr)) if marriage_date is not None or marriage_place is not None: family_element.add_child_element( create_event(tags.GEDCOM_TAG_MARRIAGE, marriage_place, marriage_date, level + 1)) if divorce_place is not None or divorce_date is not None: family_element.add_child_element(create_event(tags.GEDCOM_TAG_DIVORCE, divorce_place, divorce_date, level + 1)) return family_element
def _parse_family(self, element: FamilyElement): family = Family(ptr=element.get_pointer()) assert family.ptr not in self.families, family self.families[family.ptr] = family self.graph.add_node(family.ptr, family=family)