Beispiel #1
0
    def _parse_header(self, file_object):
        self._file = file_object
        encoding = self._encoding

        # parse headers
        self._header["ds_format"] = unpack("b", self._file.read(1))[0]

        if self._header["ds_format"] not in [113, 114, 115]:
            raise ValueError(
                "Only file formats >= 113 (Stata >= 9)"
                " are supported.  Got format %s.  Please report "
                "if you think this error is incorrect." % self._header["ds_format"]
            )
        byteorder = self._header["byteorder"] = unpack("b", self._file.read(1))[0] == 0x1 and ">" or "<"
        self._header["filetype"] = unpack("b", self._file.read(1))[0]
        self._file.read(1)
        nvar = self._header["nvar"] = unpack(byteorder + "h", self._file.read(2))[0]
        self._header["nobs"] = unpack(byteorder + "i", self._file.read(4))[0]
        self._header["data_label"] = self._null_terminate(self._file.read(81), encoding)
        self._header["time_stamp"] = self._null_terminate(self._file.read(18), encoding)

        # parse descriptors
        typlist = [ord(self._file.read(1)) for i in range(nvar)]
        self._header["typlist"] = [self.TYPE_MAP[typ] for typ in typlist]
        self._header["dtyplist"] = [self.DTYPE_MAP[typ] for typ in typlist]
        self._header["varlist"] = [self._null_terminate(self._file.read(33), encoding) for i in range(nvar)]
        self._header["srtlist"] = unpack(byteorder + ("h" * (nvar + 1)), self._file.read(2 * (nvar + 1)))[:-1]
        if self._header["ds_format"] <= 113:
            self._header["fmtlist"] = [self._null_terminate(self._file.read(12), encoding) for i in range(nvar)]
        else:
            self._header["fmtlist"] = [self._null_terminate(self._file.read(49), encoding) for i in range(nvar)]
        self._header["lbllist"] = [self._null_terminate(self._file.read(33), encoding) for i in range(nvar)]
        self._header["vlblist"] = [self._null_terminate(self._file.read(81), encoding) for i in range(nvar)]

        # ignore expansion fields
        # When reading, read five bytes; the last four bytes now tell you the
        # size of the next read, which you discard.  You then continue like
        # this until you read 5 bytes of zeros.

        while True:
            data_type = unpack(byteorder + "b", self._file.read(1))[0]
            data_len = unpack(byteorder + "i", self._file.read(4))[0]
            if data_type == 0:
                break
            self._file.read(data_len)

        # other state vars
        self._data_location = self._file.tell()
        self._has_string_data = len(lfilter(lambda x: isinstance(x, int), self._header["typlist"])) > 0
        self._col_size()
Beispiel #2
0
    def in_domain(self, xs, ys, x):
        """
        Returns the filtered (xs, ys) based on the Kernel domain centred on x
        """
        # Disable black-list functions: filter used for speed instead of
        # list-comprehension
        # pylint: disable-msg=W0141
        def isInDomain(xy):
            """Used for filter to check if point is in the domain"""
            u = (xy[0]-x)/self.h
            return u >= self.domain[0] and u <= self.domain[1]

        if self.domain is None:
            return (xs, ys)
        else:
            filtered = lfilter(isInDomain, lzip(xs, ys))
            if len(filtered) > 0:
                xs, ys = lzip(*filtered)
                return (xs, ys)
            else:
                return ([], [])
Beispiel #3
0
    def in_domain(self, xs, ys, x):
        """
        Returns the filtered (xs, ys) based on the Kernel domain centred on x
        """
        # Disable black-list functions: filter used for speed instead of
        # list-comprehension
        # pylint: disable-msg=W0141
        def isInDomain(xy):
            """Used for filter to check if point is in the domain"""
            u = (xy[0]-x)/self.h
            return u >= self.domain[0] and u <= self.domain[1]

        if self.domain is None:
            return (xs, ys)
        else:
            filtered = lfilter(isInDomain, lzip(xs, ys))
            if len(filtered) > 0:
                xs, ys = lzip(*filtered)
                return (xs, ys)
            else:
                return ([], [])
    def _parse_header(self, file_object):
        self._file = file_object
        encoding = self._encoding

        # parse headers
        self._header['ds_format'] = unpack('b', self._file.read(1))[0]

        if self._header['ds_format'] not in [113, 114, 115]:
            raise ValueError("Only file formats >= 113 (Stata >= 9)"
                             " are supported.  Got format %s.  Please report "
                             "if you think this error is incorrect." %
                             self._header['ds_format'])
        byteorder = self._header['byteorder'] = unpack(
            'b', self._file.read(1))[0] == 0x1 and '>' or '<'
        self._header['filetype'] = unpack('b', self._file.read(1))[0]
        self._file.read(1)
        nvar = self._header['nvar'] = unpack(byteorder + 'h',
                                             self._file.read(2))[0]
        self._header['nobs'] = unpack(byteorder + 'i', self._file.read(4))[0]
        self._header['data_label'] = self._null_terminate(
            self._file.read(81), encoding)
        self._header['time_stamp'] = self._null_terminate(
            self._file.read(18), encoding)

        # parse descriptors
        typlist = [ord(self._file.read(1)) for i in range(nvar)]
        self._header['typlist'] = [self.TYPE_MAP[typ] for typ in typlist]
        self._header['dtyplist'] = [self.DTYPE_MAP[typ] for typ in typlist]
        self._header['varlist'] = [
            self._null_terminate(self._file.read(33), encoding)
            for i in range(nvar)
        ]
        self._header['srtlist'] = unpack(byteorder + ('h' * (nvar + 1)),
                                         self._file.read(2 * (nvar + 1)))[:-1]
        if self._header['ds_format'] <= 113:
            self._header['fmtlist'] = \
                    [self._null_terminate(self._file.read(12), encoding) \
                    for i in range(nvar)]
        else:
            self._header['fmtlist'] = \
                    [self._null_terminate(self._file.read(49), encoding) \
                    for i in range(nvar)]
        self._header['lbllist'] = [
            self._null_terminate(self._file.read(33), encoding)
            for i in range(nvar)
        ]
        self._header['vlblist'] = [
            self._null_terminate(self._file.read(81), encoding)
            for i in range(nvar)
        ]

        # ignore expansion fields
        # When reading, read five bytes; the last four bytes now tell you the
        # size of the next read, which you discard.  You then continue like
        # this until you read 5 bytes of zeros.

        while True:
            data_type = unpack(byteorder + 'b', self._file.read(1))[0]
            data_len = unpack(byteorder + 'i', self._file.read(4))[0]
            if data_type == 0:
                break
            self._file.read(data_len)

        # other state vars
        self._data_location = self._file.tell()
        self._has_string_data = len(
            lfilter(lambda x: isinstance(x, int), self._header['typlist'])) > 0
        self._col_size()