def _parse_header(self, file_object): self._file = file_object encoding = self._encoding # parse headers self._header["ds_format"] = unpack("b", self._file.read(1))[0] if self._header["ds_format"] not in [113, 114, 115]: raise ValueError( "Only file formats >= 113 (Stata >= 9)" " are supported. Got format %s. Please report " "if you think this error is incorrect." % self._header["ds_format"] ) byteorder = self._header["byteorder"] = unpack("b", self._file.read(1))[0] == 0x1 and ">" or "<" self._header["filetype"] = unpack("b", self._file.read(1))[0] self._file.read(1) nvar = self._header["nvar"] = unpack(byteorder + "h", self._file.read(2))[0] self._header["nobs"] = unpack(byteorder + "i", self._file.read(4))[0] self._header["data_label"] = self._null_terminate(self._file.read(81), encoding) self._header["time_stamp"] = self._null_terminate(self._file.read(18), encoding) # parse descriptors typlist = [ord(self._file.read(1)) for i in range(nvar)] self._header["typlist"] = [self.TYPE_MAP[typ] for typ in typlist] self._header["dtyplist"] = [self.DTYPE_MAP[typ] for typ in typlist] self._header["varlist"] = [self._null_terminate(self._file.read(33), encoding) for i in range(nvar)] self._header["srtlist"] = unpack(byteorder + ("h" * (nvar + 1)), self._file.read(2 * (nvar + 1)))[:-1] if self._header["ds_format"] <= 113: self._header["fmtlist"] = [self._null_terminate(self._file.read(12), encoding) for i in range(nvar)] else: self._header["fmtlist"] = [self._null_terminate(self._file.read(49), encoding) for i in range(nvar)] self._header["lbllist"] = [self._null_terminate(self._file.read(33), encoding) for i in range(nvar)] self._header["vlblist"] = [self._null_terminate(self._file.read(81), encoding) for i in range(nvar)] # ignore expansion fields # When reading, read five bytes; the last four bytes now tell you the # size of the next read, which you discard. You then continue like # this until you read 5 bytes of zeros. while True: data_type = unpack(byteorder + "b", self._file.read(1))[0] data_len = unpack(byteorder + "i", self._file.read(4))[0] if data_type == 0: break self._file.read(data_len) # other state vars self._data_location = self._file.tell() self._has_string_data = len(lfilter(lambda x: isinstance(x, int), self._header["typlist"])) > 0 self._col_size()
def in_domain(self, xs, ys, x): """ Returns the filtered (xs, ys) based on the Kernel domain centred on x """ # Disable black-list functions: filter used for speed instead of # list-comprehension # pylint: disable-msg=W0141 def isInDomain(xy): """Used for filter to check if point is in the domain""" u = (xy[0]-x)/self.h return u >= self.domain[0] and u <= self.domain[1] if self.domain is None: return (xs, ys) else: filtered = lfilter(isInDomain, lzip(xs, ys)) if len(filtered) > 0: xs, ys = lzip(*filtered) return (xs, ys) else: return ([], [])
def _parse_header(self, file_object): self._file = file_object encoding = self._encoding # parse headers self._header['ds_format'] = unpack('b', self._file.read(1))[0] if self._header['ds_format'] not in [113, 114, 115]: raise ValueError("Only file formats >= 113 (Stata >= 9)" " are supported. Got format %s. Please report " "if you think this error is incorrect." % self._header['ds_format']) byteorder = self._header['byteorder'] = unpack( 'b', self._file.read(1))[0] == 0x1 and '>' or '<' self._header['filetype'] = unpack('b', self._file.read(1))[0] self._file.read(1) nvar = self._header['nvar'] = unpack(byteorder + 'h', self._file.read(2))[0] self._header['nobs'] = unpack(byteorder + 'i', self._file.read(4))[0] self._header['data_label'] = self._null_terminate( self._file.read(81), encoding) self._header['time_stamp'] = self._null_terminate( self._file.read(18), encoding) # parse descriptors typlist = [ord(self._file.read(1)) for i in range(nvar)] self._header['typlist'] = [self.TYPE_MAP[typ] for typ in typlist] self._header['dtyplist'] = [self.DTYPE_MAP[typ] for typ in typlist] self._header['varlist'] = [ self._null_terminate(self._file.read(33), encoding) for i in range(nvar) ] self._header['srtlist'] = unpack(byteorder + ('h' * (nvar + 1)), self._file.read(2 * (nvar + 1)))[:-1] if self._header['ds_format'] <= 113: self._header['fmtlist'] = \ [self._null_terminate(self._file.read(12), encoding) \ for i in range(nvar)] else: self._header['fmtlist'] = \ [self._null_terminate(self._file.read(49), encoding) \ for i in range(nvar)] self._header['lbllist'] = [ self._null_terminate(self._file.read(33), encoding) for i in range(nvar) ] self._header['vlblist'] = [ self._null_terminate(self._file.read(81), encoding) for i in range(nvar) ] # ignore expansion fields # When reading, read five bytes; the last four bytes now tell you the # size of the next read, which you discard. You then continue like # this until you read 5 bytes of zeros. while True: data_type = unpack(byteorder + 'b', self._file.read(1))[0] data_len = unpack(byteorder + 'i', self._file.read(4))[0] if data_type == 0: break self._file.read(data_len) # other state vars self._data_location = self._file.tell() self._has_string_data = len( lfilter(lambda x: isinstance(x, int), self._header['typlist'])) > 0 self._col_size()