Example #1
0
    def __init__(self,
                 name,
                 index,
                 base=None,
                 storagegroup=None,
                 dshape=None,
                 fillvalue=None,
                 shape=None,
                 chunks=None,
                 data=None,
                 indices=None):
        """Create a new column.

        if index is None and self.index return None, a new index and dataset are created.
        """
        super(Column, self).__init__(name, index, base=base)
        if storagegroup is None:
            if index is not None:
                storagegroup = index.storagegroup
            else:
                storagegroup = Group.default(name=get_random_name('column_'))
        self._storagegroup = storagegroup
        self.dataset = None
        self._dshape = None
        if self.index is None:
            if data is not None:  # check before creating everything
                l = len(data)
                if indices and l != len(indices):
                    raise ValueError('Bad index length (%d/%d)', len(indices),
                                     l)
            self._complete_column(dshape, fillvalue, shape, chunks, data)
            if data is not None:
                self.append(data, indices)
Example #2
0
    def __init__(
        self,
        name: Optional[str],
        data: Any = None,
        dshape: Optional[Union[str, DataShape]] = None,
        fillvalues: Optional[Dict[str, Any]] = None,
        storagegroup: Optional[Group] = None,
        chunks: Optional[Chunks] = None,
        create: Optional[bool] = None,
        indices: Optional[Index] = None,
    ):
        # pylint: disable=too-many-arguments, too-many-branches
        super(Table, self).__init__()
        if not (fillvalues is None or isinstance(fillvalues, Mapping)):
            raise ValueError(
                "Invalid fillvalues (%s) should be None or a dictionary" %
                fillvalues)
        if not (chunks is None or isinstance(chunks,
                                             (integer_types, Mapping))):
            raise ValueError(
                "Invalid chunks (%s) should be None or a dictionary" % chunks)
        if data is not None:
            if create is not None and create is not True:
                logger.warning("creating a Table with data and create=False")
            create = True

        self._chunks = chunks
        # self._nrow = 0
        self._name: str = get_random_name("table_") if name is None else name
        # TODO: attach all randomly named tables to a dedicated, common parent node
        if not (storagegroup is None or isinstance(storagegroup, Group)):
            raise ValueError(
                "Invalid storagegroup (%s) should be None or a Group" %
                storagegroup)
        if storagegroup is None:
            assert Group.default
            storagegroup = Group.default(self._name, create=create)
        if storagegroup is None:
            raise RuntimeError("Cannot get a valid default storage Group")
        self._storagegroup = storagegroup
        if dshape is None:
            if data is None:
                self._dshape = EMPTY_DSHAPE
            else:
                data = self.parse_data(data)
                self._dshape = dshape_extract(data) or EMPTY_DSHAPE
        else:
            self._dshape = dshape_create(dshape)
            assert dshape_table_check(self._dshape)
        if create and self._dshape is EMPTY_DSHAPE:
            raise ValueError("Cannot create a table without a dshape")
        if self._dshape is EMPTY_DSHAPE or (not create and metadata.ATTR_TABLE
                                            in self._storagegroup.attrs):
            self._load_table()
        else:
            self._create_table(fillvalues or {})
        if data is not None:
            self.append(data, indices=indices)
Example #3
0
    def __init__(self,
                 name,
                 data=None,
                 dshape=None,
                 fillvalues=None,
                 storagegroup=None,
                 chunks=None,
                 create=None,
                 indices=None):
        # pylint: disable=too-many-arguments, too-many-branches
        super(Table, self).__init__()
        if not (fillvalues is None or isinstance(fillvalues, Mapping)):
            raise ValueError(
                'Invalid fillvalues (%s) should be None or a dictionary' %
                fillvalues)
        if not (chunks is None or isinstance(chunks,
                                             (integer_types, Mapping))):
            raise ValueError(
                'Invalid chunks (%s) should be None or a dictionary' % chunks)
        if data is not None:
            if create is not None:
                logger.warning('creating a Table with data and create=False')
            create = True

        self._chunks = chunks
        #self._nrow = 0
        self._name = get_random_name('table_') if name is None else name
        # TODO: attach all randomly named tables to a dedicated, common parent node
        if not (storagegroup is None or isinstance(storagegroup, Group)):
            raise ValueError(
                'Invalid storagegroup (%s) should be None or a Group' %
                storagegroup)
        if storagegroup is None:
            storagegroup = Group.default(self._name, create=create)
        if storagegroup is None:
            raise RuntimeError('Cannot get a valid default storage Group')
        self._storagegroup = storagegroup
        if dshape is None:
            if data is None:
                self._dshape = None
            else:
                data = self.parse_data(data)
                self._dshape = dshape_extract(data)
        else:
            self._dshape = dshape_create(dshape)
            assert dshape_table_check(self._dshape)
        if create and self._dshape is None:
            raise ValueError('Cannot create a table without a dshape')
        if self._dshape is None or (not create and metadata.ATTR_TABLE
                                    in self._storagegroup.attrs):
            self._load_table()
        else:
            self._create_table(fillvalues or {})
        if data is not None:
            self.append(data, indices=indices)
Example #4
0
def _create_table(tname: str, columns: Parameters) -> Table:
    dshape = ""
    data = {}
    for (name, dtype, val) in columns:
        if dshape:
            dshape += ","
        dshape += "%s: %s" % (name, dshape_from_dtype(dtype))
        data[name] = val
    dshape = "{" + dshape + "}"
    assert Group.default_internal
    table = Table(tname, dshape=dshape, storagegroup=Group.default_internal(tname))
    table.add(data)
    return table
Example #5
0
def _create_table(tname, columns):
    dshape = ""
    data = {}
    for (name, dtype, val) in columns:
        if dshape:
            dshape += ','
        dshape += '%s: %s' % (name, dshape_from_dtype(dtype))
        data[name] = val
    dshape = '{' + dshape + '}'
    table = Table(tname,
                  dshape=dshape,
                  storagegroup=Group.default_internal(tname))
    table.add(data)
    return table
Example #6
0
    def __init__(
        self,
        name: str,
        index: Optional[IndexTable],
        base: Optional[BaseColumn] = None,
        storagegroup: Optional[Group] = None,
        dshape: Optional[Union[None, DataShape, str]] = None,
        fillvalue: Optional[Any] = None,
        shape: Optional[Shape] = None,
        chunks: Optional[Chunks] = None,
        indices: Optional[Index] = None,
        data: Optional[Any] = None,
    ) -> None:
        """Create a new column.

        if index is None and self.index return None, a new index and
        dataset are created.
        """
        indexwasnone: bool = index is None
        if index is None:
            if data is not None:  # check before creating everything
                length = len(data)
                if indices and length != len(indices):
                    raise ValueError("Bad index length (%d/%d)", len(indices),
                                     length)
            index = IndexTable()
        super(Column, self).__init__(name, index, base=base)
        if storagegroup is None:
            if index is not None and hasattr(index, "storagegroup"):
                # i.e. isinstance(index, Table)
                storagegroup = getattr(index, "storagegroup")
                assert isinstance(storagegroup, Group)
            else:
                assert Group.default
                storagegroup = Group.default(name=get_random_name("column_"))
        self._storagegroup = storagegroup
        self.dataset: Optional[Dataset] = None
        self._dshape: DataShape = EMPTY_DSHAPE
        if isinstance(dshape, DataShape):
            self._dshape = dshape
        elif isinstance(dshape, str):
            self._dshape = dshape_create(dshape)
        if indexwasnone:
            self._complete_column(dshape, fillvalue, shape, chunks, data)
            if data is not None:
                self.append(data, indices)
Example #7
0
 def setUp(self) -> None:
     super(TestTable, self).setUp()
     self.scheduler_ = Scheduler.default
     assert Group.default is not None
     self.storagegroup = Group.default()
Example #8
0
    def __init__(self,
                 name=None,
                 group=None,
                 scheduler=None,
                 tracer=None,
                 predictor=None,
                 storage=None,
                 storagegroup=None,
                 input_descriptors=None,
                 output_descriptors=None,
                 **kwds):
        if scheduler is None:
            scheduler = BaseScheduler.default
        self._scheduler = scheduler
        if name is None:
            name = self._scheduler.generate_name(self.pretty_typename())
        if self._scheduler.exists(name):
            raise ProgressiveError('module already exists in scheduler,'
                                   ' delete it first')
        self._name = name
        if predictor is None:
            predictor = TimePredictor.default()
        predictor.name = name
        self.predictor = predictor
        if storage is None:
            storage = StorageManager.default
        self.storage = storage
        if storagegroup is None:
            storagegroup = Group.default_internal(
                get_random_name(name + '_tracer'))
        self.storagegroup = storagegroup
        if tracer is None:
            tracer = Tracer.default(name, storagegroup)

        # always present
        input_descriptors = input_descriptors or []
        output_descriptors = output_descriptors or []
        output_descriptors += [
            SlotDescriptor(Module.TRACE_SLOT, type=BaseTable, required=False)
        ]
        input_descriptors += [
            SlotDescriptor(Module.PARAMETERS_SLOT,
                           type=BaseTable,
                           required=False)
        ]
        self.order = None
        self._group = group
        self.tracer = tracer
        self._start_time = None
        self._end_time = None
        self._last_update = 0
        self._state = Module.state_created
        self._had_error = False
        self._parse_parameters(kwds)
        self._input_slots = self._validate_descriptors(input_descriptors)
        self.input_descriptors = {d.name: d for d in input_descriptors}
        self._output_slots = self._validate_descriptors(output_descriptors)
        self.output_descriptors = {d.name: d for d in output_descriptors}
        self.default_step_size = 100
        self.input = InputSlots(self)
        self.output = OutputSlots(self)
        self.steps_acc = 0
        # callbacks
        self._start_run = None
        self._end_run = None
        self._synchronized_lock = self.scheduler().create_lock()
        self._add_module()
Example #9
0
    def __init__(
        self,
        name: Optional[str] = None,
        group: Optional[str] = None,
        scheduler: Optional[Scheduler] = None,
        storagegroup: Optional[Group] = None,
        **kwds: Any,
    ) -> None:
        self._args: Sequence[Tuple[str, Any]]
        self._kwds: Dict[str, Any]
        if scheduler is None:
            scheduler = Scheduler.default
        self._scheduler: Scheduler = scheduler
        if scheduler.dataflow is None:
            raise ProgressiveError("No valid context in scheduler")
        dataflow: Dataflow = scheduler.dataflow
        if name is None:
            name = dataflow.generate_name(self.pretty_typename())
        elif name in dataflow:
            raise ProgressiveError(
                "module already exists in scheduler," " delete it first"
            )
        self.name = name  # need to set the name so exception can remove it
        predictor = TimePredictor.default()
        predictor.name = name
        self.predictor = predictor
        storage = StorageManager.default
        self.storage = storage
        if storagegroup is None:
            assert Group.default_internal is not None
            storagegroup = Group.default_internal(get_random_name(name + "_tracer"))
        self.storagegroup: Group = storagegroup
        tracer = Tracer.default(name, storagegroup)

        self.tags = set(ModuleTag.tags)
        self.order: int = -1
        self.group: Optional[str] = group or GroupContext.group
        self.tracer = tracer
        self._start_time: float = 0
        self._end_time: float = 0
        self._last_update: int = 0
        self._state: ModuleState = Module.state_created
        self._saved_state: ModuleState = Module.state_invalid
        self._had_error = False
        self._parse_parameters(kwds)

        # always present
        input_descriptors = self.all_inputs
        output_descriptors = self.all_outputs
        self._input_slots: Dict[str, Optional[Slot]] = self._validate_descriptors(
            input_descriptors
        )
        self.input_descriptors: Dict[str, SlotDescriptor] = {
            d.name: d for d in input_descriptors
        }
        # self.input_multiple: Dict[str, int] = {
        #     d.name: 0 for d in input_descriptors if d.multiple
        # }
        self._output_slots: Dict[
            str, Optional[List[Slot]]
        ] = self._validate_descriptors(output_descriptors)
        self.output_descriptors: Dict[str, SlotDescriptor] = {
            d.name: d for d in output_descriptors
        }
        self.default_step_size: int = 100
        self.input = InputSlots(self)
        self.output = OutputSlots(self)
        self.steps_acc: int = 0
        # self.wait_expr = aio.FIRST_COMPLETED
        self.context: Optional[_Context] = None
        # callbacks
        self._start_run = ModuleCallbackList()
        self._after_run = ModuleCallbackList()
        self._ending: List[ModuleCb] = []
        # Register module
        dataflow.add_module(self)
Example #10
0
 def setUp(self):
     super(TestTable, self).setUp()        
     self.scheduler = Scheduler.default
     self.storagegroup = Group.default()