class ObjectFile(Document): """ Inventory object """ meta = { "collection": "noc.objectfiles", "strict": False, "auto_create_index": False, "indexes": ["object"] } object = ObjectIdField() name = StringField() file = FileField() ts = DateTimeField() description = StringField() size = IntField() mime_type = StringField() def __unicode__(self): return unicode(self.name or self.id) def delete_file(self): if self.file: self.file.delete() @classmethod def delete_files(cls, sender, document, target=None): for o in ObjectFile.objects.filter(object=document.id): o.delete_file() o.delete() @classmethod def on_delete(cls, sender, document, target=None): document.delete_file()
class BlackboxModel(Document): """A class which describes the model of a Blackbox inside MongoDB.""" model_id = StringField(unique=True, required=True) creation_date = DateTimeField(default=datetime.datetime.utcnow()) last_update_date = DateTimeField() models = ListField(StringField(), required=True) columns = ListField(StringField(), required=True) trained = BooleanField(default=False) saved = FileField() meta = {"allow_inheritance": True} def to_dict(self): return { "model_id": self.model_id, "creation_date": self.creation_date.isoformat(), "last_update_date": self.last_update_date.isoformat(), "models": self.models, "columns": self.columns, "trained": self.trained, } def clean(self): if not all(model in AVAILABLE_MODELS for model in self.models): raise ValidationError( f"There is at least one model in the list of models that does not " f"exist. Passed models: {', '.join(self.models)}. " f"Available models: {', '.join(AVAILABLE_MODELS)} ") @classmethod def pre_save(cls, sender, document, **kwargs): document.last_update_date = datetime.datetime.utcnow()
class RecipeDoc(Document): title = StringField(required=True, min_length=3) author = ReferenceField(UserDoc, required=True) image = FileField() ingredients = EmbeddedDocumentListField(IngredientDoc) gear = ListField(StringField()) steps = EmbeddedDocumentListField(StepDoc) meta = { 'collection': 'recipes' }
class CmsTemplate(Document): """ Cms模板,多个页面可以使用相同的模板 """ # 站点ID siteId = ReferenceField(CmsSite, required=True) # 模版名称 templateName = StringField(required=True) # 模版参数 templateParameter = StringField() # 模版文件Id templateFileId = FileField()
class Metadata(Document): """ Metadata stores information about objects in OmegaStore """ # fields #: this is the name of the data name = StringField(unique_with=['bucket', 'prefix']) #: bucket bucket = StringField() #: prefix prefix = StringField() #: kind of data kind = StringField(choices=MDREGISTRY.KINDS) #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile gridfile = FileField( db_alias='omega', collection_name=settings().OMEGA_MONGO_COLLECTION) #: for PANDAS_DFROWS this is the collection collection = StringField() #: for PYTHON_DATA this is the actual document objid = ObjectIdField() #: omegaml technical attributes, e.g. column indicies kind_meta = DictField() #: customer-defined other meta attributes attributes = DictField() #: s3file attributes s3file = DictField() #: location URI uri = StringField() #: created datetime created = DateTimeField(default=datetime.datetime.now) # the actual db is defined in settings, OMEGA_MONGO_URL meta = { 'db_alias': 'omega', 'indexes': [ # unique entry { 'fields': ['bucket', 'prefix', 'name'], }, 'created', # most recent is last, i.e. [-1] ] } def __eq__(self, other): return self.objid == other.objid def __unicode__(self): fields = ('name', 'bucket', 'prefix', 'created', 'kind') kwargs = ('%s=%s' % (k, getattr(self, k)) for k in self._fields.keys() if k in fields) return u"Metadata(%s)" % ','.join(kwargs)
class ModelDO(Document): """Model Plain Object. The primary key of the model plain object is (engine, name, version) pair. """ # Model name name = StringField(required=True) # Supported engine enum (aka framework, e.g.: TensorFlow (0) or PyTorch (1)) framework = IntField(required=True) # ONNX or TensorRT engine = IntField(required=True) # Version of the model. e.g.: `1` version = IntField(required=True) # Dataset dataset = StringField(required=True) # Model evaluation metric metric = DictField(required=True) # Model weights weight = FileField() # Model task task = IntField(required=True) # Parent Model ID parent_model_id = StringField() # inputs standard inputs = EmbeddedDocumentListField(IOShapeDO) # outputs standard outputs = EmbeddedDocumentListField(IOShapeDO) # Profile result profile_result = EmbeddedDocumentField(ProfileResultDO) # Status enum value status = IntField(required=True) # Model Status enum value model_status = ListField() # Model provider (uploader) creator = StringField(required=True) # Creation time of this record create_time = DateTimeField(required=True) meta = { 'indexes': [{ 'fields': ('engine', 'name', 'framework', 'version', 'task', 'dataset'), 'unique': True }] }
class Metadata: """ Metadata stores information about objects in OmegaStore """ # NOTE THIS IS ONLY HERE FOR DOCUMENTATION PURPOSE. # # If you use this class to save a document, it will raise a NameError # # The actual Metadata class is created in make_Metadata() below. # Rationale: If we let mongoengine create Metadata here the class # is bound to a specific MongoClient instance. Using make_Metadata # binds the class to the specific instance that exists at the time # of creation. Open to better ways. # fields #: this is the name of the data name = StringField(unique_with=['bucket', 'prefix']) #: bucket bucket = StringField() #: prefix prefix = StringField() #: kind of data kind = StringField(choices=MDREGISTRY.KINDS) #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile gridfile = FileField( db_alias='omega', collection_name=settings().OMEGA_MONGO_COLLECTION) #: for PANDAS_DFROWS this is the collection collection = StringField() #: for PYTHON_DATA this is the actual document objid = ObjectIdField() #: omegaml technical attributes, e.g. column indicies kind_meta = DictField() #: customer-defined other meta attributes attributes = DictField() #: s3file attributes s3file = DictField() #: location URI uri = StringField() #: created datetime created = DateTimeField(default=datetime.datetime.now) #: created datetime modified = DateTimeField(default=datetime.datetime.now)
class Metadata(Metadata_base, Document): # override db_alias in gridfile gridfile = FileField(db_alias=db_alias, collection_name=collection) # the actual db is defined at runtime meta = { 'db_alias': db_alias, 'strict': False, 'indexes': [ # unique entry { 'fields': ['bucket', 'prefix', 'name'], }, 'created', # most recent is last, i.e. [-1] ] } def __new__(cls, *args, **kwargs): # undo the Metadata.__new__ protection newcls = super(Metadata, cls).__real_new__(cls) return newcls def __eq__(self, other): return self.objid == other.objid def __unicode__(self): fields = ('name', 'bucket', 'prefix', 'created', 'kind') kwargs = ('%s=%s' % (k, getattr(self, k)) for k in self._fields.keys() if k in fields) return u"Metadata(%s)" % ','.join(kwargs) def save(self, *args, **kwargs): assert self.name is not None, "a dataset name is needed before saving" self.modified = datetime.datetime.now() return super(Metadata_base, self).save(*args, **kwargs) def to_json(self, **kwargs): kwargs['json_options'] = kwargs.get('json_options', LEGACY_JSON_OPTIONS) return super().to_json(**kwargs) def to_dict(self): return self.to_mongo().to_dict()
class ThemeFile(Document): class Proxy(object): def __init__(self, theme): self.theme = theme def __getitem__(self, item): return ThemeFile.objects(theme=self.theme, name=item).first() theme = ReferenceField('Theme') name = StringField() file = FileField() def read(self, *args, **kwargs): return self.file.read(*args, **kwargs) @property def content_type(self): return self.file.content_type def delete(self, *args, **kwargs): if self.file: self.file.delete() super(ThemeFile, self).delete(*args, **kwargs)
class CmsPage(Document): """ Cms页面信息 """ # 站点 * siteId = ReferenceField(CmsSite, required=True) # 页面名称 * pageName = StringField(required=True) # 页面别名 pageAlias = StringField(default='', null=True) # 访问地址 * pageWebPath = StringField(required=True) # 页面保存在站点的物理路径,是站点物理路径的子路径 * pagePhysicalPath = StringField(required=True) # 类型(静态/动态)* pageType = StringField(default='0') # 页面模版 pageTemplate = StringField() # 页面静态化内容 pageHtml = StringField() # 状态 pageStatus = StringField(null=True) # 创建时间 pageCreateTime = DateTimeField(default=datetime.now) # 模版id * templateId = ReferenceField(CmsTemplate, required=True) # 页面参数列表,暂未使用 pageParams = ListField() # 静态文件Id htmlFileId = FileField(null=True) # 数据Url dataUrl = StringField(null=True) @classmethod def find_by_id(cls, page_id): return cls.objects(id=page_id).first()
class BaseStudy(Document): """Contains data process results. Can act as a generic data archive. """ # ID name = StringField(max_length=120, required=True, unique=True) # Data Inputs parents = MapField(ReferenceField('BaseStudy')) # Precursor data used by the Analysis params = DictField() # Processing parameters. # Data Outputs newfile = MapField(FileField()) fileversions = MapField(FileField()) versiondefault = StringField(default='default') _timestamp = StringField() valid_age = IntField(default=0) # Zero means always valid valid_type = StringField(choices=['market', 'always'], default='market') # Run Status status = StringField(choices=['Idle', 'Running'], default='Idle') # Meta notes = MapField(ListField(StringField())) schema_version = IntField(default=1) meta = { 'strict': False, 'collection': 'study', 'allow_inheritance': True, } def __repr__(self): return f"BaseStudy: {self.name}" # pylint: disable=no-member def clean(self): """Clean attributes.""" # Parents for key, parent in self.parents.items(): if isinstance(parent, DBRef): del self.parents[key] # Timestamp display try: self._timestamp = self.timestamp.format() except AttributeError: self._timestamp = None # Subclassed cleaning self.subclean() def subclean(self): """Cleaning operations for subclasses.""" pass ## Methods defining the Study ## def rename(self, newname): """Rename the Study.""" self.name = newname self.save() ## Methods related to scheduling runs ## @property def timestamp(self): """Preprocess the timestamp to ensure consistency.""" return self.get_timestamp(self.versiondefault) def get_timestamp(self, version): try: recent_file = self.fileversions.get(version) return arrow.get(recent_file.uploadDate).to(Config.TZ) except: return @property def valid(self): """Check if the Study data is still valid.""" # Check the age of the data if not self.timestamp: current = False elif self.valid_type == 'market': current = self.market_valid(self.timestamp) elif self.valid_type == 'always' or self.valid_age == 0: current = True else: current = arrow.now(Config.TZ) - self.timestamp < dt.timedelta(days=self.valid_age) # Check if the parents are valid too if current: for parent in self.parents.values(): if not parent.valid: current = False break return current @staticmethod def market_valid(timestamp): """Check if the market has or hasn't progressed since the last timestamp.""" now = arrow.now(Config.TZ) schedule, nyse = market_schedule(timestamp, now) is_open = nyse.open_at_time(schedule, now.datetime) # Market currently open open_close_dt = pd.DataFrame([], index=schedule.values.flatten()) ## Market day boundaries if is_open: ## If the market is open, the data should be refreshed return False elif len(open_close_dt[timestamp.datetime:now.datetime]) > 0: ## A market day boundary has passed return False else: ## The market hasn't changed. The data is still valid. return True @staticmethod def alert_overwrite(timestamp, now): """Check if the previous alert should be overwritten.""" if timestamp is None: return False schedule, _ = market_schedule(timestamp, now) ## New alert only if a new market day has begun. Otherwise, overwrite. open_dt = pd.DataFrame([], index=schedule['market_open']) return len(open_dt[timestamp.datetime:now.datetime]) == 0 @property def dependencies(self): """Create a dictionary of dependencies.""" deps = {str(self.id): [str(parent.id) for parent in self.parents.values()]} for parent in self.parents.values(): deps.update(parent.dependencies) return deps def run_if(self, function=None): """Run the Study if it's no longer valid.""" if not self.valid: self.run(function) def run(self, function=None, force=False): """Run the Study process on the inputs and return any alerts.""" raise Exception("Cannot run from BaseStudy objects.") ## Methods for handling inputs ## def add_parents(self, newparents): """Add all of the parents in the given dict of ids.""" try: parent_objects = {key: BaseStudy.objects(name=val).get() for key, val in newparents.items()} except InvalidDocument: parent_objects = newparents self.parents.update(parent_objects) self.save() def set_parents(self, newparents): """Overwrite the existing parents with new ones.""" self.parents = {} self.add_parents(newparents) def add_params(self, newparams): """Add all of the params in the given dict.""" self.params.update(newparams) self.save() def remove_inputs(self, inputs): """Remove all of the inputs in the given iterable of names.""" for key in inputs: self.parents.pop(key, None) self.params.pop(key, None) self.save() ## Methods for handling the saved data ## @property def version(self): return getattr(self, '_version', self.versiondefault) @version.setter def version(self, label): self._version = label @property def all_versions(self): return list(self.fileversions.keys()) @property def data(self): """Preprocess the data field to return the data in a usable format.""" if self.newfile.get(self.version): self.transfer_file(self.newfile, self.fileversions) try: file_obj = self.fileversions.get(self.version).get() result = file_obj.read() file_obj.seek(0) return pickle.loads(result) except: return None @data.setter def data(self, newdata): """Process the data for storage.""" if newdata is None: self.remove_files() else: self.write_version(self.newfile, newdata) self.transfer_file(self.newfile, self.fileversions) def write_to(self, field, newdata): """Write data to a FileField.""" field.new_file() field.write(pickle.dumps(newdata)) field.close() self.save() def write_version(self, field, newdata): """Write data into a mapped FileField.""" fileslot = self.get_fileslot(field) self.write_to(fileslot, newdata) def get_fileslot(self, field): """Get an existing fileslot in a mapfield, or create it.""" fileslot = field.get(self.version, GridFSProxy()) field[self.version] = fileslot return fileslot def copy_file(self, filesrc, filedest): """Copy the data from filesrc to filedest.""" newfile = filesrc.read() filedest.replace(newfile) self.save() def transfer_file(self, filesrc, filedest): """Transfer a file between FileFields, possibly within a MapField.""" try: filesrc = filesrc.pop(self.version, None) except AttributeError: pass if isinstance(filedest, dict): filedest = self.get_fileslot(filedest) self.copy_file(filesrc, filedest) filesrc.delete() self.save() def remove_file(self, field): """Remove a file version from a MapField.""" field[self.version].delete() del field[self.version] self.save() def remove_files(self): """Remove the data.""" for field in (self.fileversions, self.newfile): try: self.remove_file(field) except KeyError: pass def rename_data(self, oldname, newname): """Rename the data file.""" try: self.fileversions[newname].delete() except KeyError: pass self.fileversions[newname] = self.fileversions[oldname] del self.fileversions[oldname] self.save() def add_note(self, title='default', text=None): """Add a note to the notes field.""" entry = self.notes.get(title, []) if text: entry.append(text) self.notes[title] = entry self.save() def del_note(self, title='default', index=None): """Delete the specified note.""" if index is None: self.notes.pop(title, None) else: entry = self.notes.get(title, []) entry.pop(index) self.save() def read_notes(self, title=None): """Read the notes.""" if title is None: for key in reversed(self.notes.keys()): self.read_notes(key) else: entry = [f"{i}: {note}" for i, note in enumerate(self.notes.get(title, []))] print(f"{title}\n\t" + "\n\t".join(entry))
class Metadata(Document): """ Metadata stores information about objects in OmegaStore """ # default kinds of data PANDAS_DFROWS = 'pandas.dfrows' # dataframe PANDAS_SEROWS = 'pandas.serows' # series PANDAS_HDF = 'pandas.hdf' PYTHON_DATA = 'python.data' PANDAS_DFGROUP = 'pandas.dfgroup' SKLEARN_JOBLIB = 'sklearn.joblib' OMEGAML_JOBS = 'script.ipynb' SPARK_MLLIB = 'spark.mllib' OMEGAML_RUNNING_JOBS = 'job.run' #: the list of accepted data types. extend using OmegaStore.register_backend KINDS = [ PANDAS_DFROWS, PANDAS_SEROWS, PANDAS_HDF, PYTHON_DATA, SKLEARN_JOBLIB, PANDAS_DFGROUP, OMEGAML_JOBS, OMEGAML_RUNNING_JOBS, SPARK_MLLIB] # fields #: this is the name of the data name = StringField() #: bucket bucket = StringField() #: prefix prefix = StringField() #: kind of data kind = StringField(choices=KINDS) #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile gridfile = FileField( db_alias='omega', collection_name=settings().OMEGA_MONGO_COLLECTION) #: for PANDAS_DFROWS this is the collection collection = StringField() #: for PYTHON_DATA this is the actual document objid = ObjectIdField() #: omegaml technical attributes, e.g. column indicies kind_meta = DictField() #: customer-defined other meta attributes attributes = DictField() #: s3file attributes s3file = DictField() #: location URI uri = StringField() #: created datetime created = DateTimeField(default=datetime.datetime.now) # the actual db is defined in settings, OMEGA_MONGO_URL meta = { 'db_alias': 'omega', 'indexes': [ # unique entry { 'fields': ['bucket', 'prefix', 'name'], }, 'created', # most recent is last, i.e. [-1] ] } def __unicode__(self): kwargs = ('%s=%s' % (k, getattr(self, k)) for k in self._fields.keys() if k in ('bucket', 'prefix', 'created', 'kind')) return u"Metadata(%s)" % ','.join(kwargs)