Esempio n. 1
0
    def _map_array(self,
                   offset=None,
                   length=None,
                   dtype=np.float64,
                   stride=1,
                   filename=None,
                   array=None,
                   name='unknown'):
        if filename is None:
            filename = self.filename
        if not self.nommap:
            mapping = self.mapping_map[filename]

        if array is not None:
            length = len(array)

        # if self._length_original is not None and length != self._length_original:
        #     logger.error("inconsistent length", "length of column %s is %d, while %d was expected" % (name, length, self._length))
        # else:
        # self._length_unfiltered = length
        # self._length_original = length
        # if self.current_slice is None:
        #     self.current_slice = (0, length)
        #     self.fraction = 1.
        #     self._length = length
        #     self._index_end = self._length_unfiltered
        #     self._index_start = 0
        # print self.mapping, dtype, length if stride is None else length * stride, offset
        if 1:
            if array is not None:
                length = len(array)
                mmapped_array = array
                stride = None
                offset = None
                dtype = array.dtype
                column = array
            else:
                if offset is None:
                    print("offset is None")
                    sys.exit(0)

                #file = open(filename, 'rb') #self.file_map[filename]
                file = self.file_map[filename]
                if ColumnReader:
                    import io
                    column = ColumnReader(self, io.FileIO(file.fileno(), 'rb'),
                                          offset, length, dtype)
                    #column = ColumnReader(self, file, offset, length, dtype)
                else:
                    column = np.frombuffer(
                        mapping,
                        dtype=dtype,
                        count=length if stride is None else length * stride,
                        offset=offset)
                    if stride and stride != 1:
                        column = column[::stride]
            return column
Esempio n. 2
0
 def _get_mapping(self, path):
     assert not self.nommap
     if path not in self.mapping_map:
         file = open(path, "rb+" if self.write else "rb")
         fileno = file.fileno()
         kwargs = {}
         if vaex.utils.osname == "windows":
             kwargs[
                 "access"] = mmap.ACCESS_READ | 0 if not self.write else mmap.ACCESS_WRITE
         else:
             kwargs[
                 "prot"] = mmap.PROT_READ | 0 if not self.write else mmap.PROT_WRITE
         mapping = mmap.mmap(fileno, 0, **kwargs)
         self.file_map[path] = file
         self.fileno_map[path] = fileno
         self.mapping_map[path] = mapping
     return self.mapping_map[path]
Esempio n. 3
0
 def _get_mapping(self, path):
     assert not self.nommap
     if path not in self.mapping_map:
         file = open(path, "rb+" if self.write else "rb")
         fileno = file.fileno()
         kwargs = {}
         if vaex.utils.osname == "windows":
             kwargs["access"] = mmap.ACCESS_READ | 0 if not self.write else mmap.ACCESS_WRITE
         else:
             kwargs["prot"] = mmap.PROT_READ | 0 if not self.write else mmap.PROT_WRITE
         mapping = mmap.mmap(fileno, 0, **kwargs)
         # TODO: we can think about adding this in py38
         # mapping.madvise(mmap.MADV_SEQUENTIAL)
         self.file_map[path] = file
         self.fileno_map[path] = fileno
         self.mapping_map[path] = mapping
     return self.mapping_map[path]
Esempio n. 4
0
    def _map_array(self, offset=None, length=None, dtype=np.float64, stride=1, filename=None, array=None, name='unknown'):
        if filename is None:
            filename = self.filename
        if not self.nommap:
            mapping = self.mapping_map[filename]

        if array is not None:
            length = len(array)

        # if self._length_original is not None and length != self._length_original:
        #     logger.error("inconsistent length", "length of column %s is %d, while %d was expected" % (name, length, self._length))
        # else:
            # self._length_unfiltered = length
            # self._length_original = length
            # if self.current_slice is None:
            #     self.current_slice = (0, length)
            #     self.fraction = 1.
            #     self._length = length
            #     self._index_end = self._length_unfiltered
            #     self._index_start = 0
            # print self.mapping, dtype, length if stride is None else length * stride, offset
        if 1:
            if array is not None:
                length = len(array)
                mmapped_array = array
                stride = None
                offset = None
                dtype = array.dtype
                column = array
            else:
                if offset is None:
                    print("offset is None")
                    sys.exit(0)

                #file = open(filename, 'rb') #self.file_map[filename]
                file = self.file_map[filename]
                if ColumnReader:
                    import io
                    column = ColumnReader(self, io.FileIO(file.fileno(), 'rb'), offset, length, dtype)
                    #column = ColumnReader(self, file, offset, length, dtype)
                else:
                    column = np.frombuffer(mapping, dtype=dtype, count=length if stride is None else length * stride, offset=offset)
                    if stride and stride != 1:
                        column = column[::stride]
            return column
Esempio n. 5
0
    def addColumn(self,
                  name,
                  offset=None,
                  length=None,
                  dtype=np.float64,
                  stride=1,
                  filename=None,
                  array=None):
        if filename is None:
            filename = self.filename
        if not self.nommap:
            mapping = self.mapping_map[filename]

        if array is not None:
            length = len(array)

        if self._length_original is not None and length != self._length_original:
            logger.error(
                "inconsistent length",
                "length of column %s is %d, while %d was expected" %
                (name, length, self._length))
        else:
            self._length_unfiltered = length
            self._length_original = length
            if self.current_slice is None:
                self.current_slice = (0, length)
                self.fraction = 1.
                self._length = length
                self._index_end = self._length_unfiltered
                self._index_start = 0
            # print self.mapping, dtype, length if stride is None else length * stride, offset
            if array is not None:
                length = len(array)
                mmapped_array = array
                stride = None
                offset = None
                dtype = array.dtype
                column = array
            else:
                if offset is None:
                    print("offset is None")
                    sys.exit(0)

                #file = open(filename, 'rb') #self.file_map[filename]
                file = self.file_map[filename]
                if ColumnReader:
                    import io
                    column = ColumnReader(self, io.FileIO(file.fileno(), 'rb'),
                                          offset, length, dtype)
                    #column = ColumnReader(self, file, offset, length, dtype)
                else:
                    column = np.frombuffer(
                        mapping,
                        dtype=dtype,
                        count=length if stride is None else length * stride,
                        offset=offset)
                    if stride and stride != 1:
                        column = column[::stride]
            self.columns[name] = column
            self.column_names.append(name)
            self._save_assign_expression(name, Expression(self, name))
            self.all_columns[name] = column
            self.all_column_names.append(name)
            # self.column_names.sort()
            self.nColumns += 1
            self.nRows = self._length_original
            self.offsets[name] = offset
            self.strides[name] = stride
            if filename is not None:
                self.filenames[name] = os.path.abspath(filename)
            self.dtypes[name] = dtype