def test_last_row_simple(self): s = self.scheduler() t1 = Table(name=get_random_name("cst1"), data={ 'xmin': [1], 'xmax': [2] }) t2 = Table(name=get_random_name("cst2"), data={ 'ymin': [3], 'ymax': [4] }) cst1 = Constant(t1, scheduler=s) cst2 = Constant(t2, scheduler=s) join = Join(scheduler=s) join.input.table = cst1.output.table join.input.table = cst2.output.table pr = Print(proc=self.terse, scheduler=s) pr.input.df = join.output.table s.start() s.join() #res = join.trace_stats(max_runs=1) #pd.set_option('display.expand_frame_repr', False) #print(res) df = join.table() last = df.last() self.assertTrue(last['xmin']==1 and last['xmax']==2 and \ last['ymin']==3 and last['ymax']==4)
def __init__(self, name, index, base=None, storagegroup=None, dshape=None, fillvalue=None, shape=None, chunks=None, data=None, indices=None): """Create a new column. if index is None and self.index return None, a new index and dataset are created. """ super(Column, self).__init__(name, index, base=base) if storagegroup is None: if index is not None: storagegroup = index.storagegroup else: storagegroup = Group.default(name=get_random_name('column_')) self._storagegroup = storagegroup self.dataset = None self._dshape = None if self.index is None: if data is not None: # check before creating everything l = len(data) if indices and l != len(indices): raise ValueError('Bad index length (%d/%d)', len(indices), l) self._complete_column(dshape, fillvalue, shape, chunks, data) if data is not None: self.append(data, indices)
def __init__( self, name: Optional[str], data: Any = None, dshape: Optional[Union[str, DataShape]] = None, fillvalues: Optional[Dict[str, Any]] = None, storagegroup: Optional[Group] = None, chunks: Optional[Chunks] = None, create: Optional[bool] = None, indices: Optional[Index] = None, ): # pylint: disable=too-many-arguments, too-many-branches super(Table, self).__init__() if not (fillvalues is None or isinstance(fillvalues, Mapping)): raise ValueError( "Invalid fillvalues (%s) should be None or a dictionary" % fillvalues) if not (chunks is None or isinstance(chunks, (integer_types, Mapping))): raise ValueError( "Invalid chunks (%s) should be None or a dictionary" % chunks) if data is not None: if create is not None and create is not True: logger.warning("creating a Table with data and create=False") create = True self._chunks = chunks # self._nrow = 0 self._name: str = get_random_name("table_") if name is None else name # TODO: attach all randomly named tables to a dedicated, common parent node if not (storagegroup is None or isinstance(storagegroup, Group)): raise ValueError( "Invalid storagegroup (%s) should be None or a Group" % storagegroup) if storagegroup is None: assert Group.default storagegroup = Group.default(self._name, create=create) if storagegroup is None: raise RuntimeError("Cannot get a valid default storage Group") self._storagegroup = storagegroup if dshape is None: if data is None: self._dshape = EMPTY_DSHAPE else: data = self.parse_data(data) self._dshape = dshape_extract(data) or EMPTY_DSHAPE else: self._dshape = dshape_create(dshape) assert dshape_table_check(self._dshape) if create and self._dshape is EMPTY_DSHAPE: raise ValueError("Cannot create a table without a dshape") if self._dshape is EMPTY_DSHAPE or (not create and metadata.ATTR_TABLE in self._storagegroup.attrs): self._load_table() else: self._create_table(fillvalues or {}) if data is not None: self.append(data, indices=indices)
def __init__(self, name, data=None, dshape=None, fillvalues=None, storagegroup=None, chunks=None, create=None, indices=None): # pylint: disable=too-many-arguments, too-many-branches super(Table, self).__init__() if not (fillvalues is None or isinstance(fillvalues, Mapping)): raise ValueError( 'Invalid fillvalues (%s) should be None or a dictionary' % fillvalues) if not (chunks is None or isinstance(chunks, (integer_types, Mapping))): raise ValueError( 'Invalid chunks (%s) should be None or a dictionary' % chunks) if data is not None: if create is not None: logger.warning('creating a Table with data and create=False') create = True self._chunks = chunks #self._nrow = 0 self._name = get_random_name('table_') if name is None else name # TODO: attach all randomly named tables to a dedicated, common parent node if not (storagegroup is None or isinstance(storagegroup, Group)): raise ValueError( 'Invalid storagegroup (%s) should be None or a Group' % storagegroup) if storagegroup is None: storagegroup = Group.default(self._name, create=create) if storagegroup is None: raise RuntimeError('Cannot get a valid default storage Group') self._storagegroup = storagegroup if dshape is None: if data is None: self._dshape = None else: data = self.parse_data(data) self._dshape = dshape_extract(data) else: self._dshape = dshape_create(dshape) assert dshape_table_check(self._dshape) if create and self._dshape is None: raise ValueError('Cannot create a table without a dshape') if self._dshape is None or (not create and metadata.ATTR_TABLE in self._storagegroup.attrs): self._load_table() else: self._create_table(fillvalues or {}) if data is not None: self.append(data, indices=indices)
def __init__(self, name: Optional[str] = None, parent: Optional[GroupImpl] = None): if name is None: name = get_random_name("mmapstorage_") super(MMapGroup, self).__init__(name, parent=parent) if parent is not None: if name in parent.dict: raise ValueError( "Cannot create group {}, already exists".format(name)) parent.dict[name] = self self._is_init = False
def test_last_row_simple(self) -> None: s = self.scheduler() t1 = Table(name=get_random_name("cst1"), data={"xmin": [1], "xmax": [2]}) t2 = Table(name=get_random_name("cst2"), data={"ymin": [3], "ymax": [4]}) cst1 = Constant(t1, scheduler=s) cst2 = Constant(t2, scheduler=s) join = Join(scheduler=s) join.input[0] = cst1.output.result join.input[0] = cst2.output.result pr = Print(proc=self.terse, scheduler=s) pr.input[0] = join.output.result aio.run(s.start()) # res = join.trace_stats(max_runs=1) # pd.set_option('display.expand_frame_repr', False) # print(res) last = notNone(join.table.last()) self.assertTrue( last["xmin"] == 1 and last["xmax"] == 2 and last["ymin"] == 3 and last["ymax"] == 4 )
def create_group(name=None, create=True): root = StorageEngine.engines()['mmap'] if name in root.dict: if create: name = get_random_name(name[:16]+'_') else: return root.dict[name] # TODO : specify this behaviour #grp = root.dict[name] #if not isinstance(grp, MMapGroup): # raise ValueError("{} already exists and is not a group".format(name)) #return grp return MMapGroup(name, parent=root)
def __init__( self, name: str, index: Optional[IndexTable], base: Optional[BaseColumn] = None, storagegroup: Optional[Group] = None, dshape: Optional[Union[None, DataShape, str]] = None, fillvalue: Optional[Any] = None, shape: Optional[Shape] = None, chunks: Optional[Chunks] = None, indices: Optional[Index] = None, data: Optional[Any] = None, ) -> None: """Create a new column. if index is None and self.index return None, a new index and dataset are created. """ indexwasnone: bool = index is None if index is None: if data is not None: # check before creating everything length = len(data) if indices and length != len(indices): raise ValueError("Bad index length (%d/%d)", len(indices), length) index = IndexTable() super(Column, self).__init__(name, index, base=base) if storagegroup is None: if index is not None and hasattr(index, "storagegroup"): # i.e. isinstance(index, Table) storagegroup = getattr(index, "storagegroup") assert isinstance(storagegroup, Group) else: assert Group.default storagegroup = Group.default(name=get_random_name("column_")) self._storagegroup = storagegroup self.dataset: Optional[Dataset] = None self._dshape: DataShape = EMPTY_DSHAPE if isinstance(dshape, DataShape): self._dshape = dshape elif isinstance(dshape, str): self._dshape = dshape_create(dshape) if indexwasnone: self._complete_column(dshape, fillvalue, shape, chunks, data) if data is not None: self.append(data, indices)
def __init__(self, column, min_column=None, max_column=None, reset_index=False, **kwds): self._add_slots(kwds,'input_descriptors', [SlotDescriptor('table', type=Table, required=True)]) super(Stats, self).__init__(table_slot='stats', **kwds) self._column = column self.default_step_size = 10000 if min_column is None: min_column = '_' + str(column) + '_min' if max_column is None: max_column = '_' + str(column) + '_max' self._min_column = min_column self._max_column = max_column self._reset_index = reset_index # self.schema = [(self._min_column, np.dtype(float), np.nan), # (self._max_column, np.dtype(float), np.nan),] self.schema = '{'+self._min_column+': float64, '+self._max_column+': float64}' self._table = Table(get_random_name('stats_'), dshape=self.schema)
def create_group(name: Optional[str] = None, create: bool = True) -> Group: root = StorageEngine.engines()["mmap"] assert isinstance(root, GroupImpl) if name in root.dict: if create: name = get_random_name(name[:16] + "_") else: group = root.dict[name] if isinstance(group, GroupImpl): return group raise ValueError( f"Cannot create group {name}, already exists as {type(group)}" ) # TODO : specify this behaviour # grp = root.dict[name] # if not isinstance(grp, MMapGroup): # raise ValueError("{} already exists and is not a group".format(name)) # return grp if create is False: raise ValueError(f"group {name} does not exist") return MMapGroup(name, parent=root)
def __init__(self, name=None, parent=None): if name is None: name = get_random_name("mmapstorage_") super(MMapGroup, self).__init__(name, parent=parent) self._directory = self.path() metadata = os.path.join(self._directory, METADATA_FILE) self._metadata = metadata if os.path.exists(self._directory): if not os.path.isdir(self._directory): raise OSError('Cannot create group %s'%self._directory) if not os.path.isfile(metadata): raise ValueError('Cannot create group %s, "unsuitable directory'% self._directory) _read_attributes(self._attrs.attrs, metadata) else: os.makedirs(self._directory) # can raise exceptions _write_attributes(self._attrs.attrs, metadata) if parent is not None: if name in parent.dict: raise ValueError('Cannot create group {}, already exists'.format(name)) parent.dict[name] = self
def __init__(self, column: Union[str, int], min_column: Optional[str] = None, max_column: Optional[str] = None, reset_index: bool = False, **kwds: Any) -> None: super(Stats, self).__init__(**kwds) self._column = column self.default_step_size = 10000 if min_column is None: min_column = "_" + str(column) + "_min" if max_column is None: max_column = "_" + str(column) + "_max" self._min_column: str = min_column self._max_column: str = max_column self._reset_index = reset_index # self.schema = [(self._min_column, np.dtype(float), np.nan), # (self._max_column, np.dtype(float), np.nan),] self.schema = ("{" + self._min_column + ": float64, " + self._max_column + ": float64}") self.result = Table(get_random_name("stats_"), dshape=self.schema)