예제 #1
0
def test_initialization():
    object_element = ObjectElement(level=-1,
                                   pointer="",
                                   tag=gedcom.tags.GEDCOM_TAG_OBJECT,
                                   value="")
    assert isinstance(object_element, Element)
    assert isinstance(object_element, ObjectElement)
예제 #2
0
    def new_child_element(self, tag, pointer="", value=""):
        """Creates and returns a new child element of this element

        :type tag: str
        :type pointer: str
        :type value: str
        :rtype: Element
        """
        from gedcom.element.family import FamilyElement
        from gedcom.element.file import FileElement
        from gedcom.element.individual import IndividualElement
        from gedcom.element.object import ObjectElement

        # Differentiate between the type of the new child element
        if tag == gedcom.tags.GEDCOM_TAG_FAMILY:
            child_element = FamilyElement(self.get_level() + 1, pointer, tag, value, self.__crlf)
        elif tag == gedcom.tags.GEDCOM_TAG_FILE:
            child_element = FileElement(self.get_level() + 1, pointer, tag, value, self.__crlf)
        elif tag == gedcom.tags.GEDCOM_TAG_INDIVIDUAL:
            child_element = IndividualElement(self.get_level() + 1, pointer, tag, value, self.__crlf)
        elif tag == gedcom.tags.GEDCOM_TAG_OBJECT:
            child_element = ObjectElement(self.get_level() + 1, pointer, tag, value, self.__crlf)
        else:
            child_element = Element(self.get_level() + 1, pointer, tag, value, self.__crlf)

        self.add_child_element(child_element)

        return child_element
예제 #3
0
    def __parse_line(line_number, line, last_element, strict=True):
        """Parse a line from a GEDCOM 5.5 formatted document

        Each line should have the following (bracketed items optional):
        level + ' ' + [pointer + ' ' +] tag + [' ' + line_value]

        :type line_number: int
        :type line: str
        :type last_element: Element
        :type strict: bool

        :rtype: Element
        """

        # Level must start with non-negative int, no leading zeros.
        level_regex = '^(0|[1-9]+[0-9]*) '

        # Pointer optional, if it exists it must be flanked by `@`
        pointer_regex = '(@[^@]+@ |)'

        # Tag must be an alphanumeric string
        tag_regex = '([A-Za-z0-9_]+)'

        # Value optional, consists of anything after a space to end of line
        value_regex = '( [^\n\r]*|)'

        # End of line defined by `\n` or `\r`
        end_of_line_regex = '([\r\n]{1,2})'

        # Complete regex
        gedcom_line_regex = level_regex + pointer_regex + tag_regex + value_regex + end_of_line_regex
        regex_match = regex.match(gedcom_line_regex, line)

        if regex_match is None:
            if strict:
                error_message = (
                    "Line %d of document violates GEDCOM format 5.5" %
                    line_number +
                    "\nSee: https://chronoplexsoftware.com/gedcomvalidator/gedcom/gedcom-5.5.pdf"
                )
                raise GedcomFormatViolationError(error_message)
            else:
                # Quirk check - see if this is a line without a CRLF (which could be the last line)
                last_line_regex = level_regex + pointer_regex + tag_regex + value_regex
                regex_match = regex.match(last_line_regex, line)
                if regex_match is not None:
                    line_parts = regex_match.groups()

                    level = int(line_parts[0])
                    pointer = line_parts[1].rstrip(' ')
                    tag = line_parts[2]
                    value = line_parts[3][1:]
                    crlf = '\n'
                else:
                    # Quirk check - Sometimes a gedcom has a text field with a CR.
                    # This creates a line without the standard level and pointer.
                    # If this is detected then turn it into a CONC or CONT.
                    line_regex = '([^\n\r]*|)'
                    cont_line_regex = line_regex + end_of_line_regex
                    regex_match = regex.match(cont_line_regex, line)
                    line_parts = regex_match.groups()
                    level = last_element.get_level()
                    tag = last_element.get_tag()
                    pointer = None
                    value = line_parts[0][1:]
                    crlf = line_parts[1]
                    if tag != gedcom.tags.GEDCOM_TAG_CONTINUED and tag != gedcom.tags.GEDCOM_TAG_CONCATENATION:
                        # Increment level and change this line to a CONC
                        level += 1
                        tag = gedcom.tags.GEDCOM_TAG_CONCATENATION
        else:
            line_parts = regex_match.groups()

            level = int(line_parts[0])
            pointer = line_parts[1].rstrip(' ')
            tag = line_parts[2]
            value = line_parts[3][1:]
            crlf = line_parts[4]

        # Check level: should never be more than one higher than previous line.
        if level > last_element.get_level() + 1:
            error_message = (
                "Line %d of document violates GEDCOM format 5.5" % line_number
                +
                "\nLines must be no more than one level higher than previous line."
                +
                "\nSee: https://chronoplexsoftware.com/gedcomvalidator/gedcom/gedcom-5.5.pdf"
            )
            raise GedcomFormatViolationError(error_message)

        # Create element. Store in list and dict, create children and parents.
        if tag == gedcom.tags.GEDCOM_TAG_INDIVIDUAL:
            element = IndividualElement(level,
                                        pointer,
                                        tag,
                                        value,
                                        crlf,
                                        multi_line=False)
        elif tag == gedcom.tags.GEDCOM_TAG_FAMILY:
            element = FamilyElement(level,
                                    pointer,
                                    tag,
                                    value,
                                    crlf,
                                    multi_line=False)
        elif tag == gedcom.tags.GEDCOM_TAG_FILE:
            element = FileElement(level,
                                  pointer,
                                  tag,
                                  value,
                                  crlf,
                                  multi_line=False)
        elif tag == gedcom.tags.GEDCOM_TAG_OBJECT:
            element = ObjectElement(level,
                                    pointer,
                                    tag,
                                    value,
                                    crlf,
                                    multi_line=False)
        else:
            element = Element(level,
                              pointer,
                              tag,
                              value,
                              crlf,
                              multi_line=False)

        # Start with last element as parent, back up if necessary.
        parent_element = last_element

        while parent_element.get_level() > level - 1:
            parent_element = parent_element.get_parent_element()

        # Add child to parent & parent to child.
        parent_element.add_child_element(element)

        return element