def __init__(self, name, index, base=None, storagegroup=None, dshape=None, fillvalue=None, shape=None, chunks=None, data=None, indices=None): """Create a new column. if index is None and self.index return None, a new index and dataset are created. """ super(Column, self).__init__(name, index, base=base) if storagegroup is None: if index is not None: storagegroup = index.storagegroup else: storagegroup = Group.default(name=get_random_name('column_')) self._storagegroup = storagegroup self.dataset = None self._dshape = None if self.index is None: if data is not None: # check before creating everything l = len(data) if indices and l != len(indices): raise ValueError('Bad index length (%d/%d)', len(indices), l) self._complete_column(dshape, fillvalue, shape, chunks, data) if data is not None: self.append(data, indices)
def __init__( self, name: Optional[str], data: Any = None, dshape: Optional[Union[str, DataShape]] = None, fillvalues: Optional[Dict[str, Any]] = None, storagegroup: Optional[Group] = None, chunks: Optional[Chunks] = None, create: Optional[bool] = None, indices: Optional[Index] = None, ): # pylint: disable=too-many-arguments, too-many-branches super(Table, self).__init__() if not (fillvalues is None or isinstance(fillvalues, Mapping)): raise ValueError( "Invalid fillvalues (%s) should be None or a dictionary" % fillvalues) if not (chunks is None or isinstance(chunks, (integer_types, Mapping))): raise ValueError( "Invalid chunks (%s) should be None or a dictionary" % chunks) if data is not None: if create is not None and create is not True: logger.warning("creating a Table with data and create=False") create = True self._chunks = chunks # self._nrow = 0 self._name: str = get_random_name("table_") if name is None else name # TODO: attach all randomly named tables to a dedicated, common parent node if not (storagegroup is None or isinstance(storagegroup, Group)): raise ValueError( "Invalid storagegroup (%s) should be None or a Group" % storagegroup) if storagegroup is None: assert Group.default storagegroup = Group.default(self._name, create=create) if storagegroup is None: raise RuntimeError("Cannot get a valid default storage Group") self._storagegroup = storagegroup if dshape is None: if data is None: self._dshape = EMPTY_DSHAPE else: data = self.parse_data(data) self._dshape = dshape_extract(data) or EMPTY_DSHAPE else: self._dshape = dshape_create(dshape) assert dshape_table_check(self._dshape) if create and self._dshape is EMPTY_DSHAPE: raise ValueError("Cannot create a table without a dshape") if self._dshape is EMPTY_DSHAPE or (not create and metadata.ATTR_TABLE in self._storagegroup.attrs): self._load_table() else: self._create_table(fillvalues or {}) if data is not None: self.append(data, indices=indices)
def __init__(self, name, data=None, dshape=None, fillvalues=None, storagegroup=None, chunks=None, create=None, indices=None): # pylint: disable=too-many-arguments, too-many-branches super(Table, self).__init__() if not (fillvalues is None or isinstance(fillvalues, Mapping)): raise ValueError( 'Invalid fillvalues (%s) should be None or a dictionary' % fillvalues) if not (chunks is None or isinstance(chunks, (integer_types, Mapping))): raise ValueError( 'Invalid chunks (%s) should be None or a dictionary' % chunks) if data is not None: if create is not None: logger.warning('creating a Table with data and create=False') create = True self._chunks = chunks #self._nrow = 0 self._name = get_random_name('table_') if name is None else name # TODO: attach all randomly named tables to a dedicated, common parent node if not (storagegroup is None or isinstance(storagegroup, Group)): raise ValueError( 'Invalid storagegroup (%s) should be None or a Group' % storagegroup) if storagegroup is None: storagegroup = Group.default(self._name, create=create) if storagegroup is None: raise RuntimeError('Cannot get a valid default storage Group') self._storagegroup = storagegroup if dshape is None: if data is None: self._dshape = None else: data = self.parse_data(data) self._dshape = dshape_extract(data) else: self._dshape = dshape_create(dshape) assert dshape_table_check(self._dshape) if create and self._dshape is None: raise ValueError('Cannot create a table without a dshape') if self._dshape is None or (not create and metadata.ATTR_TABLE in self._storagegroup.attrs): self._load_table() else: self._create_table(fillvalues or {}) if data is not None: self.append(data, indices=indices)
def _create_table(tname: str, columns: Parameters) -> Table: dshape = "" data = {} for (name, dtype, val) in columns: if dshape: dshape += "," dshape += "%s: %s" % (name, dshape_from_dtype(dtype)) data[name] = val dshape = "{" + dshape + "}" assert Group.default_internal table = Table(tname, dshape=dshape, storagegroup=Group.default_internal(tname)) table.add(data) return table
def _create_table(tname, columns): dshape = "" data = {} for (name, dtype, val) in columns: if dshape: dshape += ',' dshape += '%s: %s' % (name, dshape_from_dtype(dtype)) data[name] = val dshape = '{' + dshape + '}' table = Table(tname, dshape=dshape, storagegroup=Group.default_internal(tname)) table.add(data) return table
def __init__( self, name: str, index: Optional[IndexTable], base: Optional[BaseColumn] = None, storagegroup: Optional[Group] = None, dshape: Optional[Union[None, DataShape, str]] = None, fillvalue: Optional[Any] = None, shape: Optional[Shape] = None, chunks: Optional[Chunks] = None, indices: Optional[Index] = None, data: Optional[Any] = None, ) -> None: """Create a new column. if index is None and self.index return None, a new index and dataset are created. """ indexwasnone: bool = index is None if index is None: if data is not None: # check before creating everything length = len(data) if indices and length != len(indices): raise ValueError("Bad index length (%d/%d)", len(indices), length) index = IndexTable() super(Column, self).__init__(name, index, base=base) if storagegroup is None: if index is not None and hasattr(index, "storagegroup"): # i.e. isinstance(index, Table) storagegroup = getattr(index, "storagegroup") assert isinstance(storagegroup, Group) else: assert Group.default storagegroup = Group.default(name=get_random_name("column_")) self._storagegroup = storagegroup self.dataset: Optional[Dataset] = None self._dshape: DataShape = EMPTY_DSHAPE if isinstance(dshape, DataShape): self._dshape = dshape elif isinstance(dshape, str): self._dshape = dshape_create(dshape) if indexwasnone: self._complete_column(dshape, fillvalue, shape, chunks, data) if data is not None: self.append(data, indices)
def setUp(self) -> None: super(TestTable, self).setUp() self.scheduler_ = Scheduler.default assert Group.default is not None self.storagegroup = Group.default()
def __init__(self, name=None, group=None, scheduler=None, tracer=None, predictor=None, storage=None, storagegroup=None, input_descriptors=None, output_descriptors=None, **kwds): if scheduler is None: scheduler = BaseScheduler.default self._scheduler = scheduler if name is None: name = self._scheduler.generate_name(self.pretty_typename()) if self._scheduler.exists(name): raise ProgressiveError('module already exists in scheduler,' ' delete it first') self._name = name if predictor is None: predictor = TimePredictor.default() predictor.name = name self.predictor = predictor if storage is None: storage = StorageManager.default self.storage = storage if storagegroup is None: storagegroup = Group.default_internal( get_random_name(name + '_tracer')) self.storagegroup = storagegroup if tracer is None: tracer = Tracer.default(name, storagegroup) # always present input_descriptors = input_descriptors or [] output_descriptors = output_descriptors or [] output_descriptors += [ SlotDescriptor(Module.TRACE_SLOT, type=BaseTable, required=False) ] input_descriptors += [ SlotDescriptor(Module.PARAMETERS_SLOT, type=BaseTable, required=False) ] self.order = None self._group = group self.tracer = tracer self._start_time = None self._end_time = None self._last_update = 0 self._state = Module.state_created self._had_error = False self._parse_parameters(kwds) self._input_slots = self._validate_descriptors(input_descriptors) self.input_descriptors = {d.name: d for d in input_descriptors} self._output_slots = self._validate_descriptors(output_descriptors) self.output_descriptors = {d.name: d for d in output_descriptors} self.default_step_size = 100 self.input = InputSlots(self) self.output = OutputSlots(self) self.steps_acc = 0 # callbacks self._start_run = None self._end_run = None self._synchronized_lock = self.scheduler().create_lock() self._add_module()
def __init__( self, name: Optional[str] = None, group: Optional[str] = None, scheduler: Optional[Scheduler] = None, storagegroup: Optional[Group] = None, **kwds: Any, ) -> None: self._args: Sequence[Tuple[str, Any]] self._kwds: Dict[str, Any] if scheduler is None: scheduler = Scheduler.default self._scheduler: Scheduler = scheduler if scheduler.dataflow is None: raise ProgressiveError("No valid context in scheduler") dataflow: Dataflow = scheduler.dataflow if name is None: name = dataflow.generate_name(self.pretty_typename()) elif name in dataflow: raise ProgressiveError( "module already exists in scheduler," " delete it first" ) self.name = name # need to set the name so exception can remove it predictor = TimePredictor.default() predictor.name = name self.predictor = predictor storage = StorageManager.default self.storage = storage if storagegroup is None: assert Group.default_internal is not None storagegroup = Group.default_internal(get_random_name(name + "_tracer")) self.storagegroup: Group = storagegroup tracer = Tracer.default(name, storagegroup) self.tags = set(ModuleTag.tags) self.order: int = -1 self.group: Optional[str] = group or GroupContext.group self.tracer = tracer self._start_time: float = 0 self._end_time: float = 0 self._last_update: int = 0 self._state: ModuleState = Module.state_created self._saved_state: ModuleState = Module.state_invalid self._had_error = False self._parse_parameters(kwds) # always present input_descriptors = self.all_inputs output_descriptors = self.all_outputs self._input_slots: Dict[str, Optional[Slot]] = self._validate_descriptors( input_descriptors ) self.input_descriptors: Dict[str, SlotDescriptor] = { d.name: d for d in input_descriptors } # self.input_multiple: Dict[str, int] = { # d.name: 0 for d in input_descriptors if d.multiple # } self._output_slots: Dict[ str, Optional[List[Slot]] ] = self._validate_descriptors(output_descriptors) self.output_descriptors: Dict[str, SlotDescriptor] = { d.name: d for d in output_descriptors } self.default_step_size: int = 100 self.input = InputSlots(self) self.output = OutputSlots(self) self.steps_acc: int = 0 # self.wait_expr = aio.FIRST_COMPLETED self.context: Optional[_Context] = None # callbacks self._start_run = ModuleCallbackList() self._after_run = ModuleCallbackList() self._ending: List[ModuleCb] = [] # Register module dataflow.add_module(self)
def setUp(self): super(TestTable, self).setUp() self.scheduler = Scheduler.default self.storagegroup = Group.default()