Exemple #1
0
class ObjectFile(Document):
    """
    Inventory object
    """
    meta = {
        "collection": "noc.objectfiles",
        "strict": False,
        "auto_create_index": False,
        "indexes": ["object"]
    }

    object = ObjectIdField()
    name = StringField()
    file = FileField()
    ts = DateTimeField()
    description = StringField()
    size = IntField()
    mime_type = StringField()

    def __unicode__(self):
        return unicode(self.name or self.id)

    def delete_file(self):
        if self.file:
            self.file.delete()

    @classmethod
    def delete_files(cls, sender, document, target=None):
        for o in ObjectFile.objects.filter(object=document.id):
            o.delete_file()
            o.delete()

    @classmethod
    def on_delete(cls, sender, document, target=None):
        document.delete_file()
class BlackboxModel(Document):
    """A class which describes the model of a Blackbox inside MongoDB."""

    model_id = StringField(unique=True, required=True)
    creation_date = DateTimeField(default=datetime.datetime.utcnow())
    last_update_date = DateTimeField()
    models = ListField(StringField(), required=True)
    columns = ListField(StringField(), required=True)
    trained = BooleanField(default=False)
    saved = FileField()

    meta = {"allow_inheritance": True}

    def to_dict(self):
        return {
            "model_id": self.model_id,
            "creation_date": self.creation_date.isoformat(),
            "last_update_date": self.last_update_date.isoformat(),
            "models": self.models,
            "columns": self.columns,
            "trained": self.trained,
        }

    def clean(self):
        if not all(model in AVAILABLE_MODELS for model in self.models):
            raise ValidationError(
                f"There is at least one model in the list of models that does not "
                f"exist. Passed models: {', '.join(self.models)}. "
                f"Available models: {', '.join(AVAILABLE_MODELS)} ")

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        document.last_update_date = datetime.datetime.utcnow()
Exemple #3
0
class RecipeDoc(Document):
    title = StringField(required=True, min_length=3)
    author = ReferenceField(UserDoc, required=True)
    image = FileField()
    ingredients = EmbeddedDocumentListField(IngredientDoc)
    gear = ListField(StringField())
    steps = EmbeddedDocumentListField(StepDoc)
    meta = {
        'collection': 'recipes'
    }
class CmsTemplate(Document):
    """
    Cms模板,多个页面可以使用相同的模板
    """
    # 站点ID
    siteId = ReferenceField(CmsSite, required=True)
    # 模版名称
    templateName = StringField(required=True)
    # 模版参数
    templateParameter = StringField()
    # 模版文件Id
    templateFileId = FileField()
Exemple #5
0
    class Metadata(Document):
        """
        Metadata stores information about objects in OmegaStore
        """

        # fields
        #: this is the name of the data
        name = StringField(unique_with=['bucket', 'prefix'])
        #: bucket
        bucket = StringField()
        #: prefix
        prefix = StringField()
        #: kind of data
        kind = StringField(choices=MDREGISTRY.KINDS)
        #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile
        gridfile = FileField(
            db_alias='omega',
            collection_name=settings().OMEGA_MONGO_COLLECTION)
        #: for PANDAS_DFROWS this is the collection
        collection = StringField()
        #: for PYTHON_DATA this is the actual document
        objid = ObjectIdField()
        #: omegaml technical attributes, e.g. column indicies
        kind_meta = DictField()
        #: customer-defined other meta attributes
        attributes = DictField()
        #: s3file attributes
        s3file = DictField()
        #: location URI
        uri = StringField()
        #: created datetime
        created = DateTimeField(default=datetime.datetime.now)
        # the actual db is defined in settings, OMEGA_MONGO_URL
        meta = {
            'db_alias': 'omega',
            'indexes': [
                # unique entry
                {
                    'fields': ['bucket', 'prefix', 'name'],
                },
                'created',  # most recent is last, i.e. [-1]
            ]
        }

        def __eq__(self, other):
            return self.objid == other.objid

        def __unicode__(self):
            fields = ('name', 'bucket', 'prefix', 'created', 'kind')
            kwargs = ('%s=%s' % (k, getattr(self, k))
                      for k in self._fields.keys() if k in fields)
            return u"Metadata(%s)" % ','.join(kwargs)
Exemple #6
0
class ModelDO(Document):
    """Model Plain Object.

    The primary key of the model plain object is (engine, name, version) pair.
    """

    # Model name
    name = StringField(required=True)
    # Supported engine enum (aka framework, e.g.: TensorFlow (0) or PyTorch (1))
    framework = IntField(required=True)
    # ONNX or TensorRT
    engine = IntField(required=True)
    # Version of the model. e.g.: `1`
    version = IntField(required=True)
    # Dataset
    dataset = StringField(required=True)
    # Model evaluation metric
    metric = DictField(required=True)
    # Model weights
    weight = FileField()
    # Model task
    task = IntField(required=True)
    # Parent Model ID
    parent_model_id = StringField()
    # inputs standard
    inputs = EmbeddedDocumentListField(IOShapeDO)
    # outputs standard
    outputs = EmbeddedDocumentListField(IOShapeDO)
    # Profile result
    profile_result = EmbeddedDocumentField(ProfileResultDO)
    # Status enum value
    status = IntField(required=True)
    # Model Status enum value
    model_status = ListField()
    # Model provider (uploader)
    creator = StringField(required=True)
    # Creation time of this record
    create_time = DateTimeField(required=True)

    meta = {
        'indexes': [{
            'fields':
            ('engine', 'name', 'framework', 'version', 'task', 'dataset'),
            'unique':
            True
        }]
    }
Exemple #7
0
class Metadata:
    """
    Metadata stores information about objects in OmegaStore
    """

    # NOTE THIS IS ONLY HERE FOR DOCUMENTATION PURPOSE.
    #
    # If you use this class to save a document, it will raise a NameError
    #
    # The actual Metadata class is created in make_Metadata() below.
    # Rationale: If we let mongoengine create Metadata here the class
    # is bound to a specific MongoClient instance. Using make_Metadata
    # binds the class to the specific instance that exists at the time
    # of creation. Open to better ways.

    # fields
    #: this is the name of the data
    name = StringField(unique_with=['bucket', 'prefix'])
    #: bucket
    bucket = StringField()
    #: prefix
    prefix = StringField()
    #: kind of data
    kind = StringField(choices=MDREGISTRY.KINDS)
    #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile
    gridfile = FileField(
        db_alias='omega',
        collection_name=settings().OMEGA_MONGO_COLLECTION)
    #: for PANDAS_DFROWS this is the collection
    collection = StringField()
    #: for PYTHON_DATA this is the actual document
    objid = ObjectIdField()
    #: omegaml technical attributes, e.g. column indicies
    kind_meta = DictField()
    #: customer-defined other meta attributes
    attributes = DictField()
    #: s3file attributes
    s3file = DictField()
    #: location URI
    uri = StringField()
    #: created datetime
    created = DateTimeField(default=datetime.datetime.now)
    #: created datetime
    modified = DateTimeField(default=datetime.datetime.now)
Exemple #8
0
    class Metadata(Metadata_base, Document):
        # override db_alias in gridfile
        gridfile = FileField(db_alias=db_alias, collection_name=collection)
        # the actual db is defined at runtime
        meta = {
            'db_alias':
            db_alias,
            'strict':
            False,
            'indexes': [
                # unique entry
                {
                    'fields': ['bucket', 'prefix', 'name'],
                },
                'created',  # most recent is last, i.e. [-1]
            ]
        }

        def __new__(cls, *args, **kwargs):
            # undo the Metadata.__new__ protection
            newcls = super(Metadata, cls).__real_new__(cls)
            return newcls

        def __eq__(self, other):
            return self.objid == other.objid

        def __unicode__(self):
            fields = ('name', 'bucket', 'prefix', 'created', 'kind')
            kwargs = ('%s=%s' % (k, getattr(self, k))
                      for k in self._fields.keys() if k in fields)
            return u"Metadata(%s)" % ','.join(kwargs)

        def save(self, *args, **kwargs):
            assert self.name is not None, "a dataset name is needed before saving"
            self.modified = datetime.datetime.now()
            return super(Metadata_base, self).save(*args, **kwargs)

        def to_json(self, **kwargs):
            kwargs['json_options'] = kwargs.get('json_options',
                                                LEGACY_JSON_OPTIONS)
            return super().to_json(**kwargs)

        def to_dict(self):
            return self.to_mongo().to_dict()
Exemple #9
0
class ThemeFile(Document):
    class Proxy(object):
        def __init__(self, theme):
            self.theme = theme

        def __getitem__(self, item):
            return ThemeFile.objects(theme=self.theme, name=item).first()

    theme = ReferenceField('Theme')
    name = StringField()
    file = FileField()

    def read(self, *args, **kwargs):
        return self.file.read(*args, **kwargs)
    
    @property
    def content_type(self):
        return self.file.content_type

    def delete(self, *args, **kwargs):
        if self.file:
            self.file.delete()
        super(ThemeFile, self).delete(*args, **kwargs)
class CmsPage(Document):
    """
    Cms页面信息
    """
    # 站点 *
    siteId = ReferenceField(CmsSite, required=True)
    # 页面名称 *
    pageName = StringField(required=True)
    # 页面别名
    pageAlias = StringField(default='', null=True)
    # 访问地址 *
    pageWebPath = StringField(required=True)
    # 页面保存在站点的物理路径,是站点物理路径的子路径 *
    pagePhysicalPath = StringField(required=True)
    # 类型(静态/动态)*
    pageType = StringField(default='0')
    # 页面模版
    pageTemplate = StringField()
    # 页面静态化内容
    pageHtml = StringField()
    # 状态
    pageStatus = StringField(null=True)
    # 创建时间
    pageCreateTime = DateTimeField(default=datetime.now)
    # 模版id *
    templateId = ReferenceField(CmsTemplate, required=True)
    # 页面参数列表,暂未使用
    pageParams = ListField()
    # 静态文件Id
    htmlFileId = FileField(null=True)
    # 数据Url
    dataUrl = StringField(null=True)

    @classmethod
    def find_by_id(cls, page_id):
        return cls.objects(id=page_id).first()
Exemple #11
0
class BaseStudy(Document):
    """Contains data process results.

    Can act as a generic data archive.
    """
    # ID
    name = StringField(max_length=120, required=True, unique=True)

    # Data Inputs
    parents = MapField(ReferenceField('BaseStudy'))  # Precursor data used by the Analysis
    params = DictField()  # Processing parameters.

    # Data Outputs
    newfile = MapField(FileField())
    fileversions = MapField(FileField())
    versiondefault = StringField(default='default')
    _timestamp = StringField()
    valid_age = IntField(default=0)  # Zero means always valid
    valid_type = StringField(choices=['market', 'always'], default='market')

    # Run Status
    status = StringField(choices=['Idle', 'Running'], default='Idle')

    # Meta
    notes = MapField(ListField(StringField()))
    schema_version = IntField(default=1)
    meta = {
        'strict': False,
        'collection': 'study',
        'allow_inheritance': True,
        }

    def __repr__(self):
        return f"BaseStudy: {self.name}"

    # pylint: disable=no-member
    def clean(self):
        """Clean attributes."""
        # Parents
        for key, parent in self.parents.items():
            if isinstance(parent, DBRef):
                del self.parents[key]
        # Timestamp display
        try:
            self._timestamp = self.timestamp.format()
        except AttributeError:
            self._timestamp = None
        # Subclassed cleaning
        self.subclean()

    def subclean(self):
        """Cleaning operations for subclasses."""
        pass

    ## Methods defining the Study ##

    def rename(self, newname):
        """Rename the Study."""
        self.name = newname
        self.save()

    ## Methods related to scheduling runs ##

    @property
    def timestamp(self):
        """Preprocess the timestamp to ensure consistency."""
        return self.get_timestamp(self.versiondefault)

    def get_timestamp(self, version):
        try:
            recent_file = self.fileversions.get(version)
            return arrow.get(recent_file.uploadDate).to(Config.TZ)
        except:
            return

    @property
    def valid(self):
        """Check if the Study data is still valid."""
        # Check the age of the data
        if not self.timestamp:
            current = False
        elif self.valid_type == 'market':
            current = self.market_valid(self.timestamp)
        elif self.valid_type == 'always' or self.valid_age == 0:
            current = True
        else:
            current = arrow.now(Config.TZ) - self.timestamp < dt.timedelta(days=self.valid_age)
        # Check if the parents are valid too
        if current:
            for parent in self.parents.values():
                if not parent.valid:
                    current = False
                    break
        return current

    @staticmethod
    def market_valid(timestamp):
        """Check if the market has or hasn't progressed since the last timestamp."""
        now = arrow.now(Config.TZ)
        schedule, nyse = market_schedule(timestamp, now)
        is_open = nyse.open_at_time(schedule, now.datetime)  # Market currently open
        open_close_dt = pd.DataFrame([], index=schedule.values.flatten())  ## Market day boundaries
        if is_open:  ## If the market is open, the data should be refreshed
            return False
        elif len(open_close_dt[timestamp.datetime:now.datetime]) > 0: ## A market day boundary has passed
            return False
        else:  ## The market hasn't changed. The data is still valid.
            return True

    @staticmethod
    def alert_overwrite(timestamp, now):
        """Check if the previous alert should be overwritten."""
        if timestamp is None:
            return False
        schedule, _ = market_schedule(timestamp, now)
        ## New alert only if a new market day has begun. Otherwise, overwrite.
        open_dt = pd.DataFrame([], index=schedule['market_open'])
        return len(open_dt[timestamp.datetime:now.datetime]) == 0
    
    @property
    def dependencies(self):
        """Create a dictionary of dependencies."""
        deps = {str(self.id): [str(parent.id) for parent in self.parents.values()]}
        for parent in self.parents.values():
            deps.update(parent.dependencies)
        return deps

    def run_if(self, function=None):
        """Run the Study if it's no longer valid."""
        if not self.valid:
            self.run(function)

    def run(self, function=None, force=False):
        """Run the Study process on the inputs and return any alerts."""
        raise Exception("Cannot run from BaseStudy objects.")

    ## Methods for handling inputs ##

    def add_parents(self, newparents):
        """Add all of the parents in the given dict of ids."""
        try:
            parent_objects = {key: BaseStudy.objects(name=val).get() for key, val in newparents.items()}
        except InvalidDocument:
            parent_objects = newparents
        self.parents.update(parent_objects)
        self.save()

    def set_parents(self, newparents):
        """Overwrite the existing parents with new ones."""
        self.parents = {}
        self.add_parents(newparents)

    def add_params(self, newparams):
        """Add all of the params in the given dict."""
        self.params.update(newparams)
        self.save()

    def remove_inputs(self, inputs):
        """Remove all of the inputs in the given iterable of names."""
        for key in inputs:
            self.parents.pop(key, None)
            self.params.pop(key, None)
        self.save()

    ## Methods for handling the saved data ##

    @property
    def version(self):
        return getattr(self, '_version', self.versiondefault)

    @version.setter
    def version(self, label):
        self._version = label

    @property
    def all_versions(self):
        return list(self.fileversions.keys())

    @property
    def data(self):
        """Preprocess the data field to return the data in a usable format."""
        if self.newfile.get(self.version):
            self.transfer_file(self.newfile, self.fileversions)
        try:
            file_obj = self.fileversions.get(self.version).get()
            result = file_obj.read()
            file_obj.seek(0)
            return pickle.loads(result)
        except:
            return None

    @data.setter
    def data(self, newdata):
        """Process the data for storage."""
        if newdata is None:
            self.remove_files()
        else:
            self.write_version(self.newfile, newdata)
            self.transfer_file(self.newfile, self.fileversions)

    def write_to(self, field, newdata):
        """Write data to a FileField."""
        field.new_file()
        field.write(pickle.dumps(newdata))
        field.close()
        self.save()

    def write_version(self, field, newdata):
        """Write data into a mapped FileField."""
        fileslot = self.get_fileslot(field)
        self.write_to(fileslot, newdata)

    def get_fileslot(self, field):
        """Get an existing fileslot in a mapfield, or create it."""
        fileslot = field.get(self.version, GridFSProxy())
        field[self.version] = fileslot
        return fileslot

    def copy_file(self, filesrc, filedest):
        """Copy the data from filesrc to filedest."""
        newfile = filesrc.read()
        filedest.replace(newfile)
        self.save()

    def transfer_file(self, filesrc, filedest):
        """Transfer a file between FileFields, possibly within a MapField."""
        try:
            filesrc = filesrc.pop(self.version, None)
        except AttributeError:
            pass
        if isinstance(filedest, dict):
            filedest = self.get_fileslot(filedest)
        self.copy_file(filesrc, filedest)
        filesrc.delete()
        self.save()

    def remove_file(self, field):
        """Remove a file version from a MapField."""
        field[self.version].delete()
        del field[self.version]
        self.save()

    def remove_files(self):
        """Remove the data."""
        for field in (self.fileversions, self.newfile):
            try:
                self.remove_file(field)
            except KeyError:
                pass

    def rename_data(self, oldname, newname):
        """Rename the data file."""
        try:
            self.fileversions[newname].delete()
        except KeyError:
            pass
        self.fileversions[newname] = self.fileversions[oldname]
        del self.fileversions[oldname]
        self.save()

    def add_note(self, title='default', text=None):
        """Add a note to the notes field."""
        entry = self.notes.get(title, [])
        if text:
            entry.append(text)
        self.notes[title] = entry
        self.save()

    def del_note(self, title='default', index=None):
        """Delete the specified note."""
        if index is None:
            self.notes.pop(title, None)
        else:
            entry = self.notes.get(title, [])
            entry.pop(index)
        self.save()

    def read_notes(self, title=None):
        """Read the notes."""
        if title is None:
            for key in reversed(self.notes.keys()):
                self.read_notes(key)
        else:
            entry = [f"{i}: {note}" for i, note in enumerate(self.notes.get(title, []))]
            print(f"{title}\n\t" + "\n\t".join(entry))
Exemple #12
0
class Metadata(Document):
    """
    Metadata stores information about objects in OmegaStore
    """
    # default kinds of data
    PANDAS_DFROWS = 'pandas.dfrows'  # dataframe
    PANDAS_SEROWS = 'pandas.serows'  # series
    PANDAS_HDF = 'pandas.hdf'
    PYTHON_DATA = 'python.data'
    PANDAS_DFGROUP = 'pandas.dfgroup'
    SKLEARN_JOBLIB = 'sklearn.joblib'
    OMEGAML_JOBS = 'script.ipynb'
    SPARK_MLLIB = 'spark.mllib'
    OMEGAML_RUNNING_JOBS = 'job.run'
    #: the list of accepted data types. extend using OmegaStore.register_backend
    KINDS = [
        PANDAS_DFROWS, PANDAS_SEROWS, PANDAS_HDF, PYTHON_DATA, SKLEARN_JOBLIB,
        PANDAS_DFGROUP, OMEGAML_JOBS, OMEGAML_RUNNING_JOBS, SPARK_MLLIB]
    # fields
    #: this is the name of the data
    name = StringField()
    #: bucket
    bucket = StringField()
    #: prefix
    prefix = StringField()
    #: kind of data
    kind = StringField(choices=KINDS)
    #: for PANDAS_HDF and SKLEARN_JOBLIB this is the gridfile
    gridfile = FileField(
        db_alias='omega',
        collection_name=settings().OMEGA_MONGO_COLLECTION)
    #: for PANDAS_DFROWS this is the collection
    collection = StringField()
    #: for PYTHON_DATA this is the actual document
    objid = ObjectIdField()
    #: omegaml technical attributes, e.g. column indicies
    kind_meta = DictField()
    #: customer-defined other meta attributes
    attributes = DictField()
    #: s3file attributes
    s3file = DictField()
    #: location URI
    uri = StringField()
    #: created datetime
    created = DateTimeField(default=datetime.datetime.now)
    # the actual db is defined in settings, OMEGA_MONGO_URL
    meta = {
        'db_alias': 'omega',
        'indexes': [
            # unique entry
            {
                'fields': ['bucket', 'prefix', 'name'],
            },
            'created',  # most recent is last, i.e. [-1]
        ]
    }

    def __unicode__(self):
        kwargs = ('%s=%s' % (k, getattr(self, k))
                  for k in self._fields.keys() if k in ('bucket', 'prefix', 'created', 'kind'))
        return u"Metadata(%s)" % ','.join(kwargs)