Ejemplo n.º 1
0
def detect_encoding(tags: Iterable[DXFTag]) -> str:
    """ Detect text encoding from header variables $DWGCODEPAGE and $ACADVER
    out of a stream of DXFTag objects.

    Assuming a malformed DXF file:

    The header variables could reside outside of the HEADER section,
    an ENDSEC tag is not a reliable fact that no $DWGCODEPAGE or
    $ACADVER header variable will show up in the remaining tag stream.

    Worst case: DXF file without a $ACADVER var, and a $DWGCODEPAGE
    unequal to "ANSI_1252" at the end of the file.

    """
    encoding = None
    dxfversion = None
    next_tag = None

    for code, value in tags:
        if code == 9:
            if value == DWGCODEPAGE:
                next_tag = DWGCODEPAGE  # e.g. (3, "ANSI_1252")
            elif value == ACADVER:
                next_tag = ACADVER  # e.g. (1, "AC1012")
        elif code == 3 and next_tag == DWGCODEPAGE:
            encoding = toencoding(value.decode(const.DEFAULT_ENCODING))
            next_tag = None
        elif code == 1 and next_tag == ACADVER:
            dxfversion = value.decode(const.DEFAULT_ENCODING)
            next_tag = None

        if encoding and dxfversion:
            return 'utf8' if dxfversion >= const.DXF2007 else encoding

    return const.DEFAULT_ENCODING
Ejemplo n.º 2
0
    def scan_params():
        dxfversion = 'AC1009'
        encoding = 'cp1252'
        try:
            # Limit search to first 1024 bytes - an arbitrary number
            # start index for 1-byte group code
            start = data.index(b'$ACADVER', 22, 1024) + 10
        except ValueError:
            pass  # HEADER var $ACADVER not present
        else:
            if data[start] != 65:  # not 'A' = 2-byte group code
                start += 1
            dxfversion = data[start:start + 6].decode()

        if dxfversion >= 'AC1021':
            encoding = 'utf8'
        else:
            try:
                # Limit search to first 1024 bytes - an arbitrary number
                # start index for 1-byte group code
                start = data.index(b'$DWGCODEPAGE', 22, 1024) + 14
            except ValueError:
                pass  # HEADER var $DWGCODEPAGE not present
            else:  # name schema is 'ANSI_xxxx'
                if data[start] != 65:  # not 'A' = 2-byte group code
                    start += 1
                end = start + 5
                while data[end] != 0:
                    end += 1
                codepage = data[start:end].decode()
                encoding = toencoding(codepage)

        return encoding, dxfversion
Ejemplo n.º 3
0
    def __init__(self, tagger: Iterable['DXFTag']):
        """
        Build a new DXF drawing from a steam of DXF tags.

        Args:
             tagger: generator or list of DXF tags as DXFTag() objects
        """

        def get_header(sections: 'SectionDict') -> 'SectionType':
            from .sections.header import HeaderSection
            header_entities = sections.get('HEADER', [None])[0]  # all tags in the first DXF structure entity
            return HeaderSection(header_entities)

        self.tracker = Tracker()
        self._dimension_renderer = DimensionRenderer()  # set DIMENSION rendering engine
        self._groups = None  # type: GroupManager  # read only
        self._materials = None  # type: MaterialManager # read only
        self._mleader_styles = None  # type: MLeaderStyleManager # read only
        self._mline_styles = None  # type: MLineStyleManager # read only
        self._acad_compatible = True  # will generated DXF file compatible with AutoCAD
        self._acad_incompatibility_reason = set()  # avoid multiple warnings for same reason
        self.filename = None  # type: str # read/write
        self.entitydb = EntityDB()  # read only
        sections = load_dxf_structure(tagger)  # load complete DXF entity structure
        # create section HEADER
        header = get_header(sections)
        self.dxfversion = header.get('$ACADVER', 'AC1009')  # type: str # read only
        self.dxffactory = dxffactory(self)  # read only, requires self.dxfversion
        self.encoding = toencoding(header.get('$DWGCODEPAGE', 'ANSI_1252'))  # type: str # read/write
        # get handle seed
        seed = header.get('$HANDSEED', str(self.entitydb.handles))  # type: str
        # setup handles
        self.entitydb.handles.reset(seed)
        # store all necessary DXF entities in the drawing database
        fill_database(self.entitydb, sections, dxfversion=self.dxfversion)
        # create sections: TABLES, BLOCKS, ENTITIES, CLASSES, OBJECTS
        self.sections = Sections(sections, drawing=self, header=header)

        if self.dxfversion > 'AC1009':
            self.rootdict = self.objects.rootdict
            self.objects.setup_objects_management_tables(self.rootdict)  # create missing tables
            if self.dxfversion in ('AC1012', 'AC1014'):  # releases R13 and R14
                repair.upgrade_to_ac1015(self)
            # some applications don't setup properly the model and paper space layouts
            repair.setup_layouts(self)
            self._groups = self.objects.groups()
            self._materials = self.objects.materials()
            self._mleader_styles = self.objects.mleader_styles()
            self._mline_styles = self.objects.mline_styles()
        else:  # dxfversion <= 'AC1009' do cleanup work, before building layouts
            if self.dxfversion < 'AC1009':  # legacy DXF version
                repair.upgrade_to_ac1009(self)  # upgrade to DXF format AC1009 (DXF R12)
            repair.cleanup_r12(self)
            # ezdxf puts automatically handles into all entities added to the entities database
            # write R12 without handles, by setting $HANDLING = 0
            self.header['$HANDLING'] = 1  # write handles by default

        self.layouts = self.dxffactory.get_layouts()
Ejemplo n.º 4
0
 def set_header_var(self, name: str, value: str) -> int:
     if name == '$ACADVER':
         self.version = value
         self.release = acad_release.get(value, 'R12')
     elif name == '$DWGCODEPAGE':
         self.encoding = toencoding(value)
     elif name == '$HANDSEED':
         self.handseed = value
     else:
         return 0
     return 1
Ejemplo n.º 5
0
def single_pass_modelspace(
    stream: BinaryIO,
    types: Iterable[str] = None,
    errors: str = "surrogateescape",
) -> Iterable[DXFGraphic]:
    """Iterate over all modelspace entities as :class:`DXFGraphic` objects in
    one single pass.

    Use this function to 'quick' iterate over modelspace entities of a **not**
    seekable binary DXF stream, filtering DXF types may speed up things if many
    entity types will be skipped.

    Args:
        stream: (not seekable) binary DXF stream
        types: DXF types like ``['LINE', '3DFACE']`` which should be returned,
            ``None`` returns all supported types.
        errors: specify decoding error handler

            - "surrogateescape" to preserve possible binary data (default)
            - "ignore" to use the replacement char U+FFFD "\ufffd" for invalid data
            - "strict" to raise an :class:`UnicodeDecodeError` exception for invalid data

    Raises:
        DXFStructureError: Invalid or incomplete DXF file
        UnicodeDecodeError: if `errors` is "strict" and a decoding error occurs

    """
    fetch_header_var: Optional[str] = None
    encoding = "cp1252"
    version = "AC1009"
    prev_code: int = -1
    prev_value: str = ""
    entities = False
    requested_types = _requested_types(types)

    for code, value in binary_tagger(stream):
        if code == 0 and value == b"ENDSEC":
            break
        elif code == 2 and prev_code == 0 and value != b"HEADER":
            # (0, SECTION), (2, name)
            # First section is not the HEADER section
            entities = value == b"ENTITIES"
            break
        elif code == 9 and value == b"$DWGCODEPAGE":
            fetch_header_var = "ENCODING"
        elif code == 9 and value == b"$ACADVER":
            fetch_header_var = "VERSION"
        elif fetch_header_var == "ENCODING":
            encoding = toencoding(value.decode())
            fetch_header_var = None
        elif fetch_header_var == "VERSION":
            version = value.decode()
            fetch_header_var = None
        prev_code = code

    if version >= "AC1021":
        encoding = "utf-8"

    queued: Optional[DXFGraphic] = None
    tags: List[DXFTag] = []
    linked_entity = entity_linker()

    for tag in tag_compiler(binary_tagger(stream, encoding, errors)):
        code = tag.code
        value = tag.value
        if entities:
            if code == 0 and value == "ENDSEC":
                if queued:
                    yield queued
                return
            if code == 0:
                if len(tags) and tags[0].value in requested_types:
                    entity = cast(DXFGraphic, factory.load(ExtendedTags(tags)))
                    if not linked_entity(
                            entity) and entity.dxf.paperspace == 0:
                        # queue one entity for collecting linked entities:
                        # VERTEX, ATTRIB
                        if queued:
                            yield queued
                        queued = entity
                tags = [tag]
            else:
                tags.append(tag)
            continue  # if entities - nothing else matters
        elif code == 2 and prev_code == 0 and prev_value == "SECTION":
            entities = value == "ENTITIES"

        prev_code = code
        prev_value = value
Ejemplo n.º 6
0
    def _load_section_dict(self, sections: loader.SectionDict) -> None:
        """ Internal API to load a DXF document from a section dict. """
        self.is_loading = True
        # Create header section:
        # All header tags are the first DXF structure entity
        header_entities = sections.get('HEADER', [None])[0]
        if header_entities is None:
            # Create default header, files without header are by default DXF R12
            self.header = HeaderSection.new(dxfversion=DXF12)
        else:
            self.header = HeaderSection.load(header_entities)

        self._dxfversion: str = self.header.get('$ACADVER', DXF12)

        # Store original DXF version of loaded file.
        self._loaded_dxfversion = self._dxfversion

        # Content encoding:
        self.encoding = toencoding(self.header.get('$DWGCODEPAGE',
                                                   'ANSI_1252'))

        # Set handle seed:
        seed: str = self.header.get('$HANDSEED', str(self.entitydb.handles))
        self.entitydb.handles.reset(_validate_handle_seed(seed))

        # Store all necessary DXF entities in the entity database:
        loader.load_and_bind_dxf_content(sections, self)

        # End of 1. loading stage, all entities of the DXF file are
        # stored in the entity database.

        # Create sections:
        self.classes = ClassesSection(self, sections.get('CLASSES', None))
        self.tables = TablesSection(self, sections.get('TABLES', None))

        # Create *Model_Space and *Paper_Space BLOCK_RECORDS
        # BlockSection setup takes care about the rest:
        self._create_required_block_records()

        # At this point all table entries are required:
        self.blocks = BlocksSection(self, sections.get('BLOCKS', None))
        self.entities = EntitySection(self, sections.get('ENTITIES', None))
        self.objects = ObjectsSection(self, sections.get('OBJECTS', None))

        # only DXF R2013+
        self.acdsdata = AcDsDataSection(self, sections.get('ACDSDATA', None))

        # Store unmanaged sections as raw tags:
        for name, data in sections.items():
            if name not in const.MANAGED_SECTIONS:
                self.stored_sections.append(StoredSection(data))

        # Objects section is not initialized!
        self._2nd_loading_stage()

        # DXF version upgrades:
        if self.dxfversion < DXF12:
            logger.info('DXF version upgrade to DXF R12.')
            self.dxfversion = DXF12

        if self.dxfversion == DXF12:
            self.tables.create_table_handles()

        if self.dxfversion in (DXF13, DXF14):
            logger.info('DXF version upgrade to DXF R2000.')
            self.dxfversion = DXF2000
            self.create_all_arrow_blocks()

        # Objects section setup:
        self.rootdict = self.objects.rootdict
        # Create missing management tables (DICTIONARY):
        self.objects.setup_objects_management_tables(self.rootdict)

        # Setup modelspace- and paperspace layouts:
        self.layouts = Layouts.load(self)

        # Additional work is common to the new and load process:
        self.is_loading = False
        self._finalize_setup()
Ejemplo n.º 7
0
    def _load(self, tagger: Iterable['DXFTag']):
        sections = load_dxf_structure(
            tagger)  # load complete DXF entity structure
        try:  # discard section THUMBNAILIMAGE
            del sections['THUMBNAILIMAGE']
        except KeyError:
            pass
        # -----------------------------------------------------------------------------------
        # create header section:
        # all header tags are the first DXF structure entity
        header_entities = sections.get('HEADER', [None])[0]
        if header_entities is None:
            # create default header, files without header are by default DXF R12
            self.header = HeaderSection.new(dxfversion=DXF12)
        else:
            self.header = HeaderSection.load(header_entities)
        # -----------------------------------------------------------------------------------
        # missing $ACADVER defaults to DXF R12
        self._dxfversion = self.header.get('$ACADVER', DXF12)  # type: str
        self._loaded_dxfversion = self._dxfversion  # save dxf version of loaded file
        self.encoding = toencoding(self.header.get(
            '$DWGCODEPAGE', 'ANSI_1252'))  # type: str # read/write
        # get handle seed
        seed = self.header.get('$HANDSEED',
                               str(self.entitydb.handles))  # type: str
        # setup handles
        self.entitydb.handles.reset(seed)
        # store all necessary DXF entities in the drawing database
        fill_database(sections, self.dxffactory)
        # all handles used in the DXF file are known at this point
        # -----------------------------------------------------------------------------------
        # create sections:
        self.classes = ClassesSection(self, sections.get('CLASSES', None))
        self.tables = TablesSection(self, sections.get('TABLES', None))
        # create *Model_Space and *Paper_Space BLOCK_RECORDS
        # BlockSection setup takes care about the rest
        self._create_required_block_records()
        # table records available
        self.blocks = BlocksSection(self, sections.get('BLOCKS', None))

        self.entities = EntitySection(self, sections.get('ENTITIES', None))
        self.objects = ObjectsSection(self, sections.get('OBJECTS', None))
        # only valid for DXF R2013 and later
        self.acdsdata = AcDsDataSection(self, sections.get('ACDSDATA', None))

        for name, data in sections.items():
            if name not in MANAGED_SECTIONS:
                self.stored_sections.append(StoredSection(data))
        # -----------------------------------------------------------------------------------
        if self.dxfversion < DXF12:
            # upgrade to DXF R12
            logger.info('Upgrading drawing to DXF R12.')
            self.dxfversion = DXF12

        # DIMSTYLE: ezdxf uses names for blocks, linetypes and text style as internal data, handles are set at export
        # requires BLOCKS and TABLES section!
        self.tables.resolve_dimstyle_names()

        if self.dxfversion == DXF12:
            # TABLE requires in DXF12 no handle and has no owner tag, but DXF R2000+, requires a TABLE with handle
            # and each table entry has an owner tag, pointing to the TABLE entry
            self.tables.create_table_handles()

        if self.dxfversion in (DXF13, DXF14):
            # upgrade to DXF R2000
            self.dxfversion = DXF2000

        self.rootdict = self.objects.rootdict
        self.objects.setup_objects_management_tables(
            self.rootdict)  # create missing tables

        self.layouts = Layouts.load(self)
        self._finalize_setup()
Ejemplo n.º 8
0
 def _get_encoding(self):
     codepage = self.header.get('$DWGCODEPAGE', 'ANSI_1252')
     return toencoding(codepage)
Ejemplo n.º 9
0
def single_pass_modelspace(stream: BinaryIO, types: Iterable[str] = None) -> Iterable[DXFGraphic]:
    """
    Iterate over all modelspace entities as :class:`DXFGraphic` objects in one single pass.

    Use this function to 'quick' iterate over modelspace entities of a **not** seekable binary DXF stream,
    filtering DXF types may speed up things if many entity types will be skipped.

    Args:
        stream: (not seekable) binary DXF stream
        types: DXF types like ``['LINE', '3DFACE']`` which should be returned, ``None`` returns all supported types.

    """
    fetch_header_var: Optional[str] = None
    encoding = 'cp1252'
    version = 'AC1009'
    prev_code: int = -1
    prev_value: str = ''
    entities = False
    requested_types = _requested_types(types)

    for code, value in binary_tagger(stream):
        if code == 0 and value == b'ENDSEC':
            break
        elif code == 2 and prev_code == 0 and value != b'HEADER':
            # (0, SECTION), (2, name)
            # First section is not the HEADER section
            entities = (value == b'ENTITIES')
            break
        elif code == 9 and value == b'$DWGCODEPAGE':
            fetch_header_var = 'ENCODING'
        elif code == 9 and value == b'$ACADVER':
            fetch_header_var = 'VERSION'
        elif fetch_header_var == 'ENCODING':
            encoding = toencoding(value.decode())
            fetch_header_var = None
        elif fetch_header_var == 'VERSION':
            version = value.decode()
            fetch_header_var = None
        prev_code = code

    if version >= 'AC1021':
        encoding = 'utf-8'

    queued: Optional[DXFEntity] = None
    tags: List[DXFTag] = []
    factory = EntityFactory()
    linked_entity = entity_linker()

    for tag in tag_compiler(binary_tagger(stream, encoding)):
        code = tag.code
        value = tag.value
        if entities:
            if code == 0 and value == 'ENDSEC':
                if queued:
                    yield queued
                return
            if code == 0:
                if len(tags) and tags[0].value in requested_types:
                    entity = factory.entity(ExtendedTags(tags))
                    if not linked_entity(entity) and entity.dxf.paperspace == 0:
                        if queued:  # queue one entity for collecting linked entities (VERTEX, ATTRIB)
                            yield queued
                        queued = entity
                tags = [tag]
            else:
                tags.append(tag)
            continue  # if entities - nothing else matters
        elif code == 2 and prev_code == 0 and prev_value == 'SECTION':
            entities = (value == 'ENTITIES')

        prev_code = code
        prev_value = value
Ejemplo n.º 10
0
 def test_ansi_1250(self):
     self.assertEqual('cp1250', toencoding('ansi_1250'))
Ejemplo n.º 11
0
 def test_default(self):
     self.assertEqual('cp1252', toencoding('xyz'))
Ejemplo n.º 12
0
def test_ansi_1250():
    assert 'cp1250' == toencoding('ansi_1250')
Ejemplo n.º 13
0
def test_default():
    assert 'cp1252' == toencoding('xyz')
Ejemplo n.º 14
0
def load(filename: str) -> FileStructure:
    """
    Load DXF file structure for file `filename`, the file has to be seekable.

    Args:
        filename: file system file name

    Raises:
        DXFStructureError: Invalid or incomplete DXF file.

    """
    file_structure = FileStructure(filename)
    file = open(filename, mode='rb')
    line: int = 1
    eof = False
    header = False
    index: List[IndexEntry] = []
    prev_code: int = -1
    prev_value: bytes = b''
    structure = None  # the actual structure tag: 'SECTION', 'LINE', ...

    def load_tag() -> Tuple[int, bytes]:
        nonlocal line
        try:
            code = int(file.readline())
        except ValueError:
            raise DXFStructureError(f'Invalid group code in line {line}')

        if code < 0 or code > 1071:
            raise DXFStructureError(
                f'Invalid group code {code} in line {line}')
        value = file.readline().rstrip(b'\r\n')
        line += 2
        return code, value

    def load_header_var() -> str:
        _, value = load_tag()
        return value.decode()

    while not eof:
        location = file.tell()
        tag_line = line
        try:
            code, value = load_tag()
            if header and code == 9:
                if value == b'$ACADVER':
                    file_structure.version = load_header_var()
                elif value == b'$DWGCODEPAGE':
                    file_structure.encoding = toencoding(load_header_var())
                continue
        except IOError:
            break

        if code == 0:
            # All structure tags have group code == 0, store file location
            structure = value
            index.append(IndexEntry(0, value.decode(), location, tag_line))
            eof = (value == b'EOF')

        elif code == 2 and prev_code == 0 and prev_value == b'SECTION':
            # Section name is the tag (2, name) following the (0, SECTION) tag.
            header = (value == b'HEADER')
            index.append(IndexEntry(2, value.decode(), location, tag_line))

        elif code == 5 and structure != b'DIMSTYLE':
            # Entity handles have always group code 5.
            index.append(IndexEntry(5, value.decode(), location, tag_line))

        elif code == 105 and structure == b'DIMSTYLE':
            # Except the DIMSTYLE table entry has group code 105.
            index.append(IndexEntry(5, value.decode(), location, tag_line))

        prev_code = code
        prev_value = value

    file.close()
    if not eof:
        raise DXFStructureError(f'Unexpected end of file.')

    if file_structure.version >= 'AC1021':  # R2007 and later
        file_structure.encoding = 'utf-8'
    file_structure.index = index
    return file_structure
Ejemplo n.º 15
0
def test_ansi_1250():
    assert "cp1250" == toencoding("ansi_1250")
Ejemplo n.º 16
0
def test_default():
    assert "cp1252" == toencoding("xyz")