def uuid(self): if self._uuid is None: # The scenario when a dataset with the same name and fields # has more than a row in the storage is when different # models provide different dataset predictions/transformations. # This is solved by adding the history_uuid of transformations # into the data.UUID. self._uuid = uuid( (self.name_uuid() + self.history_uuid()).encode()) return self._uuid
def field_uuid(self, field_name): """ UUID of the matrix/vector associated to the given field. :param field_name: Case sensitive. :return: UUID """ key = 'uuid' + field_name if self.__dict__[key] is None: uuid_ = uuid(self.field_dump(field_name)) self.__dict__[key] = uuid_ return self.__dict__[key]
def store_metadata(self, data: Data): """ Intended to be used before Data is stored. :param data: :return: """ # attr --------------------------------------------------------- # TODO: avoid sending long cols blob when unneeded cols = zlibext_pack(data.columns) uuid_cols = uuid(cols) sql = f''' insert or ignore into attr values ( NULL, ?, ? );''' with warnings.catch_warnings(): warnings.simplefilter("ignore") self.query(sql, [uuid_cols, cols]) # dataset --------------------------------------------------------- # TODO: avoid sending long names when unneeded sql = f''' insert or ignore into dataset values ( NULL, ?, ?, ? );''' with warnings.catch_warnings(): warnings.simplefilter("ignore") self.query(sql, [data.name_uuid(), data.name, uuid_cols]) # history ------------------------------------------------------ # TODO: avoid sending long hist blob when unneeded sql = f''' insert or ignore into hist values ( NULL, ?, ? )''' with warnings.catch_warnings(): warnings.simplefilter("ignore") self.query(sql, [data.history_uuid(), data.history_dump()])
def get_data_by_name_impl(self, name, fields=None, history=None): """ To just recover the original dataset you can pass history=None. Specify fields if you want to reduce traffic, otherwise all available fields will be fetched. ps. 1: Obviously, when getting prediction data (i.e., results), the history which led to the predictions should be provided. :param name: :param fields: None=get full Data; case insensitive; e.g. 'X,y,Z' :param history: nested tuples :param just_check_exists: :return: """ hist_uuid = uuid(zlibext_pack(history)) sql = f''' select X,Y,Z,P,U,V,W,Q,R,S,l,m,T,C,cols,des from data left join dataset on dataset=dsid left join attr on attr=aid where des=? and hist=?''' self.query(sql, [name, hist_uuid]) row = self.get_one() if row is None: return None # Recover requested matrices/vectors. dic = {'name': name, 'history': history} if fields is None: flst = [k for k, v in row.items() if len(k) == 1 and v is not None] else: flst = fields.split(',') for field in flst: mid = row[field] if mid is not None: self.query(f'select val,w,h from mat where mid=?', [mid]) rone = self.get_one() dic[field] = unpack_data(rone['val'], rone['w'], rone['h']) return Data(columns=zlibext_unpack(row['cols']), **dic)
def __init__(self, config): self.config = config.copy() # 'mark' should not identify components, it only marks results. # 'mark', 'max_time', 'class', 'module' are reserved word self.mark = self.config.pop('mark') if 'mark' in config else None self.max_time = self.config.pop('max_time') \ if 'max_time' in config else None # 'random_state' is reserved word if self.isdeterministic() and 'random_state' in self.config: del self.config['random_state'] if 'class' not in config: self.config['class'] = self.__class__.__name__ if 'module' not in config: self.config['module'] = self.__module__ self._serialized = json_pack(self.config) self.uuid = uuid(self._serialized.encode()) self.name = self.config['class'] self.module = self.config['module'] # a = Apply, u = Use self._modified = {'a': None, 'u': None} # self.model here refers to classifiers, preprocessors and, possibly, # some representation of pipelines or the autoML itself. # Another possibility is to generalize modules to a new class Model() # that has self.model. self.unfit = True self.model = None # Each apply() uses a different training data, so this uuid is mutable self._train_data_uuid__mutable = None self.locked_by_others = False self.failed = False self.time_spent = None self.host = None self.failure = None self._param = None
def store_result_impl(self, component, input_data, output_data): """ Store a result and remove lock. :param component: :param input_data: :param output_data: :return: """ # Store resulting Data if output_data is not None: # We call impl here, # to avoid nested storage trying to do the same work twice. self.store_data_impl(output_data) # Remove lock and point result to data inserted above. # We should set all timestamp fields even if with the same old value, # due to automatic updates by DBMSs. # Data train was inserted and dtr was created when locking(). now = self._now_function() # Store dump if requested. dump_uuid = uuid( (component.uuid + component.train_data_uuid__mutable() ).encode()) if self._dump else None if self._dump: sql = f'insert or ignore into inst values (null, ?, ?)' # pack_comp is nondeterministic and its result is big, # so we need to identify it by other, deterministic, means with warnings.catch_warnings(): warnings.simplefilter("ignore") self.query(sql, [dump_uuid, pack_comp(component)]) # Store a log if apply() failed. log_uuid = component.failure and uuid(component.failure.encode()) if component.failure is not None: sql = f'insert or ignore into log values (null, ?, ?, {now})' with warnings.catch_warnings(): warnings.simplefilter("ignore") self.query(sql, [log_uuid, component.failure]) # Unlock and save result. fail = 1 if component.failed else 0 sql = f''' update res set log=?, dout=?, spent=?, inst=?, fail=?, start=start, end={now}, alive={now}, mark=? where config=? and op=? and dtr=? and din=? ''' set_args = [ log_uuid, output_data and output_data.uuid(), component.time_spent, dump_uuid, fail, component.mark ] where_args = [ component.uuid, component.op, component.train_data_uuid__mutable(), input_data.uuid() ] # TODO: is there any important exception to handle here? self.query(sql, set_args + where_args) print(self.name, 'Stored!\n')
def history_uuid(self): if self._history_uuid is None: self._history_uuid = uuid(self.history_dump()) return self._history_uuid
def name_uuid(self): if self._name_uuid is None: self._name_uuid = uuid(self.name.encode()) return self._name_uuid