def _map_array(self, offset=None, length=None, dtype=np.float64, stride=1, filename=None, array=None, name='unknown'): if filename is None: filename = self.filename if not self.nommap: mapping = self.mapping_map[filename] if array is not None: length = len(array) # if self._length_original is not None and length != self._length_original: # logger.error("inconsistent length", "length of column %s is %d, while %d was expected" % (name, length, self._length)) # else: # self._length_unfiltered = length # self._length_original = length # if self.current_slice is None: # self.current_slice = (0, length) # self.fraction = 1. # self._length = length # self._index_end = self._length_unfiltered # self._index_start = 0 # print self.mapping, dtype, length if stride is None else length * stride, offset if 1: if array is not None: length = len(array) mmapped_array = array stride = None offset = None dtype = array.dtype column = array else: if offset is None: print("offset is None") sys.exit(0) #file = open(filename, 'rb') #self.file_map[filename] file = self.file_map[filename] if ColumnReader: import io column = ColumnReader(self, io.FileIO(file.fileno(), 'rb'), offset, length, dtype) #column = ColumnReader(self, file, offset, length, dtype) else: column = np.frombuffer( mapping, dtype=dtype, count=length if stride is None else length * stride, offset=offset) if stride and stride != 1: column = column[::stride] return column
def _get_mapping(self, path): assert not self.nommap if path not in self.mapping_map: file = open(path, "rb+" if self.write else "rb") fileno = file.fileno() kwargs = {} if vaex.utils.osname == "windows": kwargs[ "access"] = mmap.ACCESS_READ | 0 if not self.write else mmap.ACCESS_WRITE else: kwargs[ "prot"] = mmap.PROT_READ | 0 if not self.write else mmap.PROT_WRITE mapping = mmap.mmap(fileno, 0, **kwargs) self.file_map[path] = file self.fileno_map[path] = fileno self.mapping_map[path] = mapping return self.mapping_map[path]
def _get_mapping(self, path): assert not self.nommap if path not in self.mapping_map: file = open(path, "rb+" if self.write else "rb") fileno = file.fileno() kwargs = {} if vaex.utils.osname == "windows": kwargs["access"] = mmap.ACCESS_READ | 0 if not self.write else mmap.ACCESS_WRITE else: kwargs["prot"] = mmap.PROT_READ | 0 if not self.write else mmap.PROT_WRITE mapping = mmap.mmap(fileno, 0, **kwargs) # TODO: we can think about adding this in py38 # mapping.madvise(mmap.MADV_SEQUENTIAL) self.file_map[path] = file self.fileno_map[path] = fileno self.mapping_map[path] = mapping return self.mapping_map[path]
def _map_array(self, offset=None, length=None, dtype=np.float64, stride=1, filename=None, array=None, name='unknown'): if filename is None: filename = self.filename if not self.nommap: mapping = self.mapping_map[filename] if array is not None: length = len(array) # if self._length_original is not None and length != self._length_original: # logger.error("inconsistent length", "length of column %s is %d, while %d was expected" % (name, length, self._length)) # else: # self._length_unfiltered = length # self._length_original = length # if self.current_slice is None: # self.current_slice = (0, length) # self.fraction = 1. # self._length = length # self._index_end = self._length_unfiltered # self._index_start = 0 # print self.mapping, dtype, length if stride is None else length * stride, offset if 1: if array is not None: length = len(array) mmapped_array = array stride = None offset = None dtype = array.dtype column = array else: if offset is None: print("offset is None") sys.exit(0) #file = open(filename, 'rb') #self.file_map[filename] file = self.file_map[filename] if ColumnReader: import io column = ColumnReader(self, io.FileIO(file.fileno(), 'rb'), offset, length, dtype) #column = ColumnReader(self, file, offset, length, dtype) else: column = np.frombuffer(mapping, dtype=dtype, count=length if stride is None else length * stride, offset=offset) if stride and stride != 1: column = column[::stride] return column
def addColumn(self, name, offset=None, length=None, dtype=np.float64, stride=1, filename=None, array=None): if filename is None: filename = self.filename if not self.nommap: mapping = self.mapping_map[filename] if array is not None: length = len(array) if self._length_original is not None and length != self._length_original: logger.error( "inconsistent length", "length of column %s is %d, while %d was expected" % (name, length, self._length)) else: self._length_unfiltered = length self._length_original = length if self.current_slice is None: self.current_slice = (0, length) self.fraction = 1. self._length = length self._index_end = self._length_unfiltered self._index_start = 0 # print self.mapping, dtype, length if stride is None else length * stride, offset if array is not None: length = len(array) mmapped_array = array stride = None offset = None dtype = array.dtype column = array else: if offset is None: print("offset is None") sys.exit(0) #file = open(filename, 'rb') #self.file_map[filename] file = self.file_map[filename] if ColumnReader: import io column = ColumnReader(self, io.FileIO(file.fileno(), 'rb'), offset, length, dtype) #column = ColumnReader(self, file, offset, length, dtype) else: column = np.frombuffer( mapping, dtype=dtype, count=length if stride is None else length * stride, offset=offset) if stride and stride != 1: column = column[::stride] self.columns[name] = column self.column_names.append(name) self._save_assign_expression(name, Expression(self, name)) self.all_columns[name] = column self.all_column_names.append(name) # self.column_names.sort() self.nColumns += 1 self.nRows = self._length_original self.offsets[name] = offset self.strides[name] = stride if filename is not None: self.filenames[name] = os.path.abspath(filename) self.dtypes[name] = dtype